File: | include/llvm/CodeGen/TargetLowering.h |
Warning: | line 1170, column 9 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | ||||
9 | #include "ARMTargetTransformInfo.h" | |||
10 | #include "ARMSubtarget.h" | |||
11 | #include "MCTargetDesc/ARMAddressingModes.h" | |||
12 | #include "llvm/ADT/APInt.h" | |||
13 | #include "llvm/ADT/SmallVector.h" | |||
14 | #include "llvm/Analysis/LoopInfo.h" | |||
15 | #include "llvm/CodeGen/CostTable.h" | |||
16 | #include "llvm/CodeGen/ISDOpcodes.h" | |||
17 | #include "llvm/CodeGen/ValueTypes.h" | |||
18 | #include "llvm/IR/BasicBlock.h" | |||
19 | #include "llvm/IR/CallSite.h" | |||
20 | #include "llvm/IR/DataLayout.h" | |||
21 | #include "llvm/IR/DerivedTypes.h" | |||
22 | #include "llvm/IR/Instruction.h" | |||
23 | #include "llvm/IR/Instructions.h" | |||
24 | #include "llvm/IR/IntrinsicInst.h" | |||
25 | #include "llvm/IR/Type.h" | |||
26 | #include "llvm/MC/SubtargetFeature.h" | |||
27 | #include "llvm/Support/Casting.h" | |||
28 | #include "llvm/Support/MachineValueType.h" | |||
29 | #include "llvm/Target/TargetMachine.h" | |||
30 | #include <algorithm> | |||
31 | #include <cassert> | |||
32 | #include <cstdint> | |||
33 | #include <utility> | |||
34 | ||||
35 | using namespace llvm; | |||
36 | ||||
37 | #define DEBUG_TYPE"armtti" "armtti" | |||
38 | ||||
39 | bool ARMTTIImpl::areInlineCompatible(const Function *Caller, | |||
40 | const Function *Callee) const { | |||
41 | const TargetMachine &TM = getTLI()->getTargetMachine(); | |||
42 | const FeatureBitset &CallerBits = | |||
43 | TM.getSubtargetImpl(*Caller)->getFeatureBits(); | |||
44 | const FeatureBitset &CalleeBits = | |||
45 | TM.getSubtargetImpl(*Callee)->getFeatureBits(); | |||
46 | ||||
47 | // To inline a callee, all features not in the whitelist must match exactly. | |||
48 | bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) == | |||
49 | (CalleeBits & ~InlineFeatureWhitelist); | |||
50 | // For features in the whitelist, the callee's features must be a subset of | |||
51 | // the callers'. | |||
52 | bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) == | |||
53 | (CalleeBits & InlineFeatureWhitelist); | |||
54 | return MatchExact && MatchSubset; | |||
55 | } | |||
56 | ||||
57 | int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { | |||
58 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 58, __PRETTY_FUNCTION__)); | |||
59 | ||||
60 | unsigned Bits = Ty->getPrimitiveSizeInBits(); | |||
61 | if (Bits == 0 || Imm.getActiveBits() >= 64) | |||
62 | return 4; | |||
63 | ||||
64 | int64_t SImmVal = Imm.getSExtValue(); | |||
65 | uint64_t ZImmVal = Imm.getZExtValue(); | |||
66 | if (!ST->isThumb()) { | |||
67 | if ((SImmVal >= 0 && SImmVal < 65536) || | |||
68 | (ARM_AM::getSOImmVal(ZImmVal) != -1) || | |||
69 | (ARM_AM::getSOImmVal(~ZImmVal) != -1)) | |||
70 | return 1; | |||
71 | return ST->hasV6T2Ops() ? 2 : 3; | |||
72 | } | |||
73 | if (ST->isThumb2()) { | |||
74 | if ((SImmVal >= 0 && SImmVal < 65536) || | |||
75 | (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || | |||
76 | (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) | |||
77 | return 1; | |||
78 | return ST->hasV6T2Ops() ? 2 : 3; | |||
79 | } | |||
80 | // Thumb1, any i8 imm cost 1. | |||
81 | if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256)) | |||
82 | return 1; | |||
83 | if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) | |||
84 | return 2; | |||
85 | // Load from constantpool. | |||
86 | return 3; | |||
87 | } | |||
88 | ||||
89 | // Constants smaller than 256 fit in the immediate field of | |||
90 | // Thumb1 instructions so we return a zero cost and 1 otherwise. | |||
91 | int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, | |||
92 | const APInt &Imm, Type *Ty) { | |||
93 | if (Imm.isNonNegative() && Imm.getLimitedValue() < 256) | |||
94 | return 0; | |||
95 | ||||
96 | return 1; | |||
97 | } | |||
98 | ||||
99 | int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, | |||
100 | Type *Ty) { | |||
101 | // Division by a constant can be turned into multiplication, but only if we | |||
102 | // know it's constant. So it's not so much that the immediate is cheap (it's | |||
103 | // not), but that the alternative is worse. | |||
104 | // FIXME: this is probably unneeded with GlobalISel. | |||
105 | if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv || | |||
106 | Opcode == Instruction::SRem || Opcode == Instruction::URem) && | |||
107 | Idx == 1) | |||
108 | return 0; | |||
109 | ||||
110 | if (Opcode == Instruction::And) { | |||
111 | // UXTB/UXTH | |||
112 | if (Imm == 255 || Imm == 65535) | |||
113 | return 0; | |||
114 | // Conversion to BIC is free, and means we can use ~Imm instead. | |||
115 | return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty)); | |||
116 | } | |||
117 | ||||
118 | if (Opcode == Instruction::Add) | |||
119 | // Conversion to SUB is free, and means we can use -Imm instead. | |||
120 | return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty)); | |||
121 | ||||
122 | if (Opcode == Instruction::ICmp && Imm.isNegative() && | |||
123 | Ty->getIntegerBitWidth() == 32) { | |||
124 | int64_t NegImm = -Imm.getSExtValue(); | |||
125 | if (ST->isThumb2() && NegImm < 1<<12) | |||
126 | // icmp X, #-C -> cmn X, #C | |||
127 | return 0; | |||
128 | if (ST->isThumb() && NegImm < 1<<8) | |||
129 | // icmp X, #-C -> adds X, #C | |||
130 | return 0; | |||
131 | } | |||
132 | ||||
133 | // xor a, -1 can always be folded to MVN | |||
134 | if (Opcode == Instruction::Xor && Imm.isAllOnesValue()) | |||
135 | return 0; | |||
136 | ||||
137 | return getIntImmCost(Imm, Ty); | |||
138 | } | |||
139 | ||||
140 | int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | |||
141 | const Instruction *I) { | |||
142 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
143 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 143, __PRETTY_FUNCTION__)); | |||
144 | ||||
145 | // Single to/from double precision conversions. | |||
146 | static const CostTblEntry NEONFltDblTbl[] = { | |||
147 | // Vector fptrunc/fpext conversions. | |||
148 | { ISD::FP_ROUND, MVT::v2f64, 2 }, | |||
149 | { ISD::FP_EXTEND, MVT::v2f32, 2 }, | |||
150 | { ISD::FP_EXTEND, MVT::v4f32, 4 } | |||
151 | }; | |||
152 | ||||
153 | if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || | |||
154 | ISD == ISD::FP_EXTEND)) { | |||
155 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); | |||
156 | if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second)) | |||
157 | return LT.first * Entry->Cost; | |||
158 | } | |||
159 | ||||
160 | EVT SrcTy = TLI->getValueType(DL, Src); | |||
161 | EVT DstTy = TLI->getValueType(DL, Dst); | |||
162 | ||||
163 | if (!SrcTy.isSimple() || !DstTy.isSimple()) | |||
164 | return BaseT::getCastInstrCost(Opcode, Dst, Src); | |||
165 | ||||
166 | // Some arithmetic, load and store operations have specific instructions | |||
167 | // to cast up/down their types automatically at no extra cost. | |||
168 | // TODO: Get these tables to know at least what the related operations are. | |||
169 | static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = { | |||
170 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, | |||
171 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, | |||
172 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, | |||
173 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, | |||
174 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, | |||
175 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, | |||
176 | ||||
177 | // The number of vmovl instructions for the extension. | |||
178 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, | |||
179 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, | |||
180 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, | |||
181 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, | |||
182 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, | |||
183 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, | |||
184 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, | |||
185 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, | |||
186 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, | |||
187 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, | |||
188 | ||||
189 | // Operations that we legalize using splitting. | |||
190 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, | |||
191 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, | |||
192 | ||||
193 | // Vector float <-> i32 conversions. | |||
194 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, | |||
195 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, | |||
196 | ||||
197 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, | |||
198 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, | |||
199 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, | |||
200 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, | |||
201 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, | |||
202 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, | |||
203 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, | |||
204 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, | |||
205 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, | |||
206 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, | |||
207 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, | |||
208 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, | |||
209 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, | |||
210 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, | |||
211 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, | |||
212 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, | |||
213 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, | |||
214 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, | |||
215 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, | |||
216 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, | |||
217 | ||||
218 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, | |||
219 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, | |||
220 | { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, | |||
221 | { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, | |||
222 | { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, | |||
223 | { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, | |||
224 | ||||
225 | // Vector double <-> i32 conversions. | |||
226 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | |||
227 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | |||
228 | ||||
229 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, | |||
230 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, | |||
231 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, | |||
232 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, | |||
233 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | |||
234 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | |||
235 | ||||
236 | { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, | |||
237 | { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, | |||
238 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, | |||
239 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, | |||
240 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, | |||
241 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } | |||
242 | }; | |||
243 | ||||
244 | if (SrcTy.isVector() && ST->hasNEON()) { | |||
245 | if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, | |||
246 | DstTy.getSimpleVT(), | |||
247 | SrcTy.getSimpleVT())) | |||
248 | return Entry->Cost; | |||
249 | } | |||
250 | ||||
251 | // Scalar float to integer conversions. | |||
252 | static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = { | |||
253 | { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, | |||
254 | { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, | |||
255 | { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, | |||
256 | { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 }, | |||
257 | { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 }, | |||
258 | { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 }, | |||
259 | { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 }, | |||
260 | { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 }, | |||
261 | { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 }, | |||
262 | { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 }, | |||
263 | { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 }, | |||
264 | { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 }, | |||
265 | { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 }, | |||
266 | { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 }, | |||
267 | { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 }, | |||
268 | { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 }, | |||
269 | { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 }, | |||
270 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 }, | |||
271 | { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 }, | |||
272 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } | |||
273 | }; | |||
274 | if (SrcTy.isFloatingPoint() && ST->hasNEON()) { | |||
275 | if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, | |||
276 | DstTy.getSimpleVT(), | |||
277 | SrcTy.getSimpleVT())) | |||
278 | return Entry->Cost; | |||
279 | } | |||
280 | ||||
281 | // Scalar integer to float conversions. | |||
282 | static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = { | |||
283 | { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, | |||
284 | { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, | |||
285 | { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, | |||
286 | { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 }, | |||
287 | { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 }, | |||
288 | { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 }, | |||
289 | { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 }, | |||
290 | { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 }, | |||
291 | { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 }, | |||
292 | { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 }, | |||
293 | { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 }, | |||
294 | { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 }, | |||
295 | { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 }, | |||
296 | { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 }, | |||
297 | { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 }, | |||
298 | { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 }, | |||
299 | { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 }, | |||
300 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 }, | |||
301 | { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 }, | |||
302 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 } | |||
303 | }; | |||
304 | ||||
305 | if (SrcTy.isInteger() && ST->hasNEON()) { | |||
306 | if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl, | |||
307 | ISD, DstTy.getSimpleVT(), | |||
308 | SrcTy.getSimpleVT())) | |||
309 | return Entry->Cost; | |||
310 | } | |||
311 | ||||
312 | // Scalar integer conversion costs. | |||
313 | static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = { | |||
314 | // i16 -> i64 requires two dependent operations. | |||
315 | { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, | |||
316 | ||||
317 | // Truncates on i64 are assumed to be free. | |||
318 | { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 }, | |||
319 | { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 }, | |||
320 | { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 }, | |||
321 | { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 } | |||
322 | }; | |||
323 | ||||
324 | if (SrcTy.isInteger()) { | |||
325 | if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, | |||
326 | DstTy.getSimpleVT(), | |||
327 | SrcTy.getSimpleVT())) | |||
328 | return Entry->Cost; | |||
329 | } | |||
330 | ||||
331 | return BaseT::getCastInstrCost(Opcode, Dst, Src); | |||
332 | } | |||
333 | ||||
334 | int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, | |||
335 | unsigned Index) { | |||
336 | // Penalize inserting into an D-subregister. We end up with a three times | |||
337 | // lower estimated throughput on swift. | |||
338 | if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement && | |||
339 | ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32) | |||
340 | return 3; | |||
341 | ||||
342 | if ((Opcode == Instruction::InsertElement || | |||
343 | Opcode == Instruction::ExtractElement)) { | |||
344 | // Cross-class copies are expensive on many microarchitectures, | |||
345 | // so assume they are expensive by default. | |||
346 | if (ValTy->getVectorElementType()->isIntegerTy()) | |||
347 | return 3; | |||
348 | ||||
349 | // Even if it's not a cross class copy, this likely leads to mixing | |||
350 | // of NEON and VFP code and should be therefore penalized. | |||
351 | if (ValTy->isVectorTy() && | |||
352 | ValTy->getScalarSizeInBits() <= 32) | |||
353 | return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U); | |||
354 | } | |||
355 | ||||
356 | return BaseT::getVectorInstrCost(Opcode, ValTy, Index); | |||
357 | } | |||
358 | ||||
359 | int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | |||
360 | const Instruction *I) { | |||
361 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | |||
362 | // On NEON a vector select gets lowered to vbsl. | |||
363 | if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { | |||
| ||||
364 | // Lowering of some vector selects is currently far from perfect. | |||
365 | static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = { | |||
366 | { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, | |||
367 | { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, | |||
368 | { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } | |||
369 | }; | |||
370 | ||||
371 | EVT SelCondTy = TLI->getValueType(DL, CondTy); | |||
372 | EVT SelValTy = TLI->getValueType(DL, ValTy); | |||
373 | if (SelCondTy.isSimple() && SelValTy.isSimple()) { | |||
374 | if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, | |||
375 | SelCondTy.getSimpleVT(), | |||
376 | SelValTy.getSimpleVT())) | |||
377 | return Entry->Cost; | |||
378 | } | |||
379 | ||||
380 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | |||
381 | return LT.first; | |||
382 | } | |||
383 | ||||
384 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); | |||
385 | } | |||
386 | ||||
387 | int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, | |||
388 | const SCEV *Ptr) { | |||
389 | // Address computations in vectorized code with non-consecutive addresses will | |||
390 | // likely result in more instructions compared to scalar code where the | |||
391 | // computation can more often be merged into the index mode. The resulting | |||
392 | // extra micro-ops can significantly decrease throughput. | |||
393 | unsigned NumVectorInstToHideOverhead = 10; | |||
394 | int MaxMergeDistance = 64; | |||
395 | ||||
396 | if (Ty->isVectorTy() && SE && | |||
397 | !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) | |||
398 | return NumVectorInstToHideOverhead; | |||
399 | ||||
400 | // In many cases the address computation is not merged into the instruction | |||
401 | // addressing mode. | |||
402 | return 1; | |||
403 | } | |||
404 | ||||
405 | int ARMTTIImpl::getMemcpyCost(const Instruction *I) { | |||
406 | const MemCpyInst *MI = dyn_cast<MemCpyInst>(I); | |||
407 | assert(MI && "MemcpyInst expected")((MI && "MemcpyInst expected") ? static_cast<void> (0) : __assert_fail ("MI && \"MemcpyInst expected\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 407, __PRETTY_FUNCTION__)); | |||
408 | ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()); | |||
409 | ||||
410 | // To model the cost of a library call, we assume 1 for the call, and | |||
411 | // 3 for the argument setup. | |||
412 | const unsigned LibCallCost = 4; | |||
413 | ||||
414 | // If 'size' is not a constant, a library call will be generated. | |||
415 | if (!C) | |||
416 | return LibCallCost; | |||
417 | ||||
418 | const unsigned Size = C->getValue().getZExtValue(); | |||
419 | const unsigned DstAlign = MI->getDestAlignment(); | |||
420 | const unsigned SrcAlign = MI->getSourceAlignment(); | |||
421 | const Function *F = I->getParent()->getParent(); | |||
422 | const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize()); | |||
423 | std::vector<EVT> MemOps; | |||
424 | ||||
425 | // MemOps will be poplulated with a list of data types that needs to be | |||
426 | // loaded and stored. That's why we multiply the number of elements by 2 to | |||
427 | // get the cost for this memcpy. | |||
428 | if (getTLI()->findOptimalMemOpLowering( | |||
429 | MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/, | |||
430 | false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/, | |||
431 | MI->getDestAddressSpace(), MI->getSourceAddressSpace(), | |||
432 | F->getAttributes())) | |||
433 | return MemOps.size() * 2; | |||
434 | ||||
435 | // If we can't find an optimal memop lowering, return the default cost | |||
436 | return LibCallCost; | |||
437 | } | |||
438 | ||||
439 | int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, | |||
440 | Type *SubTp) { | |||
441 | if (Kind == TTI::SK_Broadcast) { | |||
442 | static const CostTblEntry NEONDupTbl[] = { | |||
443 | // VDUP handles these cases. | |||
444 | {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, | |||
445 | {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, | |||
446 | {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, | |||
447 | {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, | |||
448 | {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, | |||
449 | {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, | |||
450 | ||||
451 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, | |||
452 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, | |||
453 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, | |||
454 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; | |||
455 | ||||
456 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); | |||
457 | ||||
458 | if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, | |||
459 | LT.second)) | |||
460 | return LT.first * Entry->Cost; | |||
461 | ||||
462 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
463 | } | |||
464 | if (Kind == TTI::SK_Reverse) { | |||
465 | static const CostTblEntry NEONShuffleTbl[] = { | |||
466 | // Reverse shuffle cost one instruction if we are shuffling within a | |||
467 | // double word (vrev) or two if we shuffle a quad word (vrev, vext). | |||
468 | {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, | |||
469 | {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, | |||
470 | {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, | |||
471 | {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, | |||
472 | {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, | |||
473 | {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, | |||
474 | ||||
475 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, | |||
476 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, | |||
477 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, | |||
478 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; | |||
479 | ||||
480 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); | |||
481 | ||||
482 | if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, | |||
483 | LT.second)) | |||
484 | return LT.first * Entry->Cost; | |||
485 | ||||
486 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
487 | } | |||
488 | if (Kind == TTI::SK_Select) { | |||
489 | static const CostTblEntry NEONSelShuffleTbl[] = { | |||
490 | // Select shuffle cost table for ARM. Cost is the number of instructions | |||
491 | // required to create the shuffled vector. | |||
492 | ||||
493 | {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, | |||
494 | {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, | |||
495 | {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, | |||
496 | {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, | |||
497 | ||||
498 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, | |||
499 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, | |||
500 | {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, | |||
501 | ||||
502 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, | |||
503 | ||||
504 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; | |||
505 | ||||
506 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); | |||
507 | if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, | |||
508 | ISD::VECTOR_SHUFFLE, LT.second)) | |||
509 | return LT.first * Entry->Cost; | |||
510 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
511 | } | |||
512 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | |||
513 | } | |||
514 | ||||
515 | int ARMTTIImpl::getArithmeticInstrCost( | |||
516 | unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, | |||
517 | TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, | |||
518 | TTI::OperandValueProperties Opd2PropInfo, | |||
519 | ArrayRef<const Value *> Args) { | |||
520 | int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); | |||
521 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | |||
522 | ||||
523 | const unsigned FunctionCallDivCost = 20; | |||
524 | const unsigned ReciprocalDivCost = 10; | |||
525 | static const CostTblEntry CostTbl[] = { | |||
526 | // Division. | |||
527 | // These costs are somewhat random. Choose a cost of 20 to indicate that | |||
528 | // vectorizing devision (added function call) is going to be very expensive. | |||
529 | // Double registers types. | |||
530 | { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost}, | |||
531 | { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost}, | |||
532 | { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost}, | |||
533 | { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost}, | |||
534 | { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost}, | |||
535 | { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost}, | |||
536 | { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost}, | |||
537 | { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost}, | |||
538 | { ISD::SDIV, MVT::v4i16, ReciprocalDivCost}, | |||
539 | { ISD::UDIV, MVT::v4i16, ReciprocalDivCost}, | |||
540 | { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost}, | |||
541 | { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost}, | |||
542 | { ISD::SDIV, MVT::v8i8, ReciprocalDivCost}, | |||
543 | { ISD::UDIV, MVT::v8i8, ReciprocalDivCost}, | |||
544 | { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost}, | |||
545 | { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost}, | |||
546 | // Quad register types. | |||
547 | { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost}, | |||
548 | { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost}, | |||
549 | { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost}, | |||
550 | { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost}, | |||
551 | { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost}, | |||
552 | { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost}, | |||
553 | { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost}, | |||
554 | { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost}, | |||
555 | { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost}, | |||
556 | { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost}, | |||
557 | { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost}, | |||
558 | { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost}, | |||
559 | { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost}, | |||
560 | { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost}, | |||
561 | { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost}, | |||
562 | { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost}, | |||
563 | // Multiplication. | |||
564 | }; | |||
565 | ||||
566 | if (ST->hasNEON()) | |||
567 | if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) | |||
568 | return LT.first * Entry->Cost; | |||
569 | ||||
570 | int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, | |||
571 | Opd1PropInfo, Opd2PropInfo); | |||
572 | ||||
573 | // This is somewhat of a hack. The problem that we are facing is that SROA | |||
574 | // creates a sequence of shift, and, or instructions to construct values. | |||
575 | // These sequences are recognized by the ISel and have zero-cost. Not so for | |||
576 | // the vectorized code. Because we have support for v2i64 but not i64 those | |||
577 | // sequences look particularly beneficial to vectorize. | |||
578 | // To work around this we increase the cost of v2i64 operations to make them | |||
579 | // seem less beneficial. | |||
580 | if (LT.second == MVT::v2i64 && | |||
581 | Op2Info == TargetTransformInfo::OK_UniformConstantValue) | |||
582 | Cost += 4; | |||
583 | ||||
584 | return Cost; | |||
585 | } | |||
586 | ||||
587 | int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, | |||
588 | unsigned AddressSpace, const Instruction *I) { | |||
589 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); | |||
590 | ||||
591 | if (Src->isVectorTy() && Alignment != 16 && | |||
592 | Src->getVectorElementType()->isDoubleTy()) { | |||
593 | // Unaligned loads/stores are extremely inefficient. | |||
594 | // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. | |||
595 | return LT.first * 4; | |||
596 | } | |||
597 | return LT.first; | |||
598 | } | |||
599 | ||||
600 | int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, | |||
601 | unsigned Factor, | |||
602 | ArrayRef<unsigned> Indices, | |||
603 | unsigned Alignment, | |||
604 | unsigned AddressSpace, | |||
605 | bool UseMaskForCond, | |||
606 | bool UseMaskForGaps) { | |||
607 | assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast <void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 607, __PRETTY_FUNCTION__)); | |||
608 | assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type" ) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-9~svn359999/lib/Target/ARM/ARMTargetTransformInfo.cpp" , 608, __PRETTY_FUNCTION__)); | |||
609 | ||||
610 | // vldN/vstN doesn't support vector types of i64/f64 element. | |||
611 | bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64; | |||
612 | ||||
613 | if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits && | |||
614 | !UseMaskForCond && !UseMaskForGaps) { | |||
615 | unsigned NumElts = VecTy->getVectorNumElements(); | |||
616 | auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); | |||
617 | ||||
618 | // vldN/vstN only support legal vector types of size 64 or 128 in bits. | |||
619 | // Accesses having vector types that are a multiple of 128 bits can be | |||
620 | // matched to more than one vldN/vstN instruction. | |||
621 | if (NumElts % Factor == 0 && | |||
622 | TLI->isLegalInterleavedAccessType(SubVecTy, DL)) | |||
623 | return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL); | |||
624 | } | |||
625 | ||||
626 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | |||
627 | Alignment, AddressSpace, | |||
628 | UseMaskForCond, UseMaskForGaps); | |||
629 | } | |||
630 | ||||
631 | void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | |||
632 | TTI::UnrollingPreferences &UP) { | |||
633 | // Only currently enable these preferences for M-Class cores. | |||
634 | if (!ST->isMClass()) | |||
635 | return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP); | |||
636 | ||||
637 | // Disable loop unrolling for Oz and Os. | |||
638 | UP.OptSizeThreshold = 0; | |||
639 | UP.PartialOptSizeThreshold = 0; | |||
640 | if (L->getHeader()->getParent()->hasOptSize()) | |||
641 | return; | |||
642 | ||||
643 | // Only enable on Thumb-2 targets. | |||
644 | if (!ST->isThumb2()) | |||
645 | return; | |||
646 | ||||
647 | SmallVector<BasicBlock*, 4> ExitingBlocks; | |||
648 | L->getExitingBlocks(ExitingBlocks); | |||
649 | LLVM_DEBUG(dbgs() << "Loop has:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("armtti")) { dbgs() << "Loop has:\n" << "Blocks: " << L->getNumBlocks() << "\n" << "Exit blocks: " << ExitingBlocks.size() << "\n"; } } while (false ) | |||
650 | << "Blocks: " << L->getNumBlocks() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("armtti")) { dbgs() << "Loop has:\n" << "Blocks: " << L->getNumBlocks() << "\n" << "Exit blocks: " << ExitingBlocks.size() << "\n"; } } while (false ) | |||
651 | << "Exit blocks: " << ExitingBlocks.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("armtti")) { dbgs() << "Loop has:\n" << "Blocks: " << L->getNumBlocks() << "\n" << "Exit blocks: " << ExitingBlocks.size() << "\n"; } } while (false ); | |||
652 | ||||
653 | // Only allow another exit other than the latch. This acts as an early exit | |||
654 | // as it mirrors the profitability calculation of the runtime unroller. | |||
655 | if (ExitingBlocks.size() > 2) | |||
656 | return; | |||
657 | ||||
658 | // Limit the CFG of the loop body for targets with a branch predictor. | |||
659 | // Allowing 4 blocks permits if-then-else diamonds in the body. | |||
660 | if (ST->hasBranchPredictor() && L->getNumBlocks() > 4) | |||
661 | return; | |||
662 | ||||
663 | // Scan the loop: don't unroll loops with calls as this could prevent | |||
664 | // inlining. | |||
665 | unsigned Cost = 0; | |||
666 | for (auto *BB : L->getBlocks()) { | |||
667 | for (auto &I : *BB) { | |||
668 | if (isa<CallInst>(I) || isa<InvokeInst>(I)) { | |||
669 | ImmutableCallSite CS(&I); | |||
670 | if (const Function *F = CS.getCalledFunction()) { | |||
671 | if (!isLoweredToCall(F)) | |||
672 | continue; | |||
673 | } | |||
674 | return; | |||
675 | } | |||
676 | SmallVector<const Value*, 4> Operands(I.value_op_begin(), | |||
677 | I.value_op_end()); | |||
678 | Cost += getUserCost(&I, Operands); | |||
679 | } | |||
680 | } | |||
681 | ||||
682 | LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("armtti")) { dbgs() << "Cost of loop: " << Cost << "\n"; } } while (false); | |||
683 | ||||
684 | UP.Partial = true; | |||
685 | UP.Runtime = true; | |||
686 | UP.UnrollRemainder = true; | |||
687 | UP.DefaultUnrollRuntimeCount = 4; | |||
688 | UP.UnrollAndJam = true; | |||
689 | UP.UnrollAndJamInnerLoopThreshold = 60; | |||
690 | ||||
691 | // Force unrolling small loops can be very useful because of the branch | |||
692 | // taken cost of the backedge. | |||
693 | if (Cost < 12) | |||
694 | UP.Force = true; | |||
695 | } |
1 | //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file provides a helper that implements much of the TTI interface in |
11 | /// terms of the target-independent code generator and TargetLowering |
12 | /// interfaces. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_CODEGEN_BASICTTIIMPL_H |
17 | #define LLVM_CODEGEN_BASICTTIIMPL_H |
18 | |
19 | #include "llvm/ADT/APInt.h" |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/BitVector.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/Analysis/LoopInfo.h" |
25 | #include "llvm/Analysis/TargetTransformInfo.h" |
26 | #include "llvm/Analysis/TargetTransformInfoImpl.h" |
27 | #include "llvm/CodeGen/ISDOpcodes.h" |
28 | #include "llvm/CodeGen/TargetLowering.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/CodeGen/ValueTypes.h" |
31 | #include "llvm/IR/BasicBlock.h" |
32 | #include "llvm/IR/CallSite.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/DerivedTypes.h" |
37 | #include "llvm/IR/InstrTypes.h" |
38 | #include "llvm/IR/Instruction.h" |
39 | #include "llvm/IR/Instructions.h" |
40 | #include "llvm/IR/Intrinsics.h" |
41 | #include "llvm/IR/Operator.h" |
42 | #include "llvm/IR/Type.h" |
43 | #include "llvm/IR/Value.h" |
44 | #include "llvm/MC/MCSchedule.h" |
45 | #include "llvm/Support/Casting.h" |
46 | #include "llvm/Support/CommandLine.h" |
47 | #include "llvm/Support/ErrorHandling.h" |
48 | #include "llvm/Support/MachineValueType.h" |
49 | #include "llvm/Support/MathExtras.h" |
50 | #include <algorithm> |
51 | #include <cassert> |
52 | #include <cstdint> |
53 | #include <limits> |
54 | #include <utility> |
55 | |
56 | namespace llvm { |
57 | |
58 | class Function; |
59 | class GlobalValue; |
60 | class LLVMContext; |
61 | class ScalarEvolution; |
62 | class SCEV; |
63 | class TargetMachine; |
64 | |
65 | extern cl::opt<unsigned> PartialUnrollingThreshold; |
66 | |
67 | /// Base class which can be used to help build a TTI implementation. |
68 | /// |
69 | /// This class provides as much implementation of the TTI interface as is |
70 | /// possible using the target independent parts of the code generator. |
71 | /// |
72 | /// In order to subclass it, your class must implement a getST() method to |
73 | /// return the subtarget, and a getTLI() method to return the target lowering. |
74 | /// We need these methods implemented in the derived class so that this class |
75 | /// doesn't have to duplicate storage for them. |
76 | template <typename T> |
77 | class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { |
78 | private: |
79 | using BaseT = TargetTransformInfoImplCRTPBase<T>; |
80 | using TTI = TargetTransformInfo; |
81 | |
82 | /// Estimate a cost of Broadcast as an extract and sequence of insert |
83 | /// operations. |
84 | unsigned getBroadcastShuffleOverhead(Type *Ty) { |
85 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 85, __PRETTY_FUNCTION__)); |
86 | unsigned Cost = 0; |
87 | // Broadcast cost is equal to the cost of extracting the zero'th element |
88 | // plus the cost of inserting it into every element of the result vector. |
89 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
90 | Instruction::ExtractElement, Ty, 0); |
91 | |
92 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
93 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
94 | Instruction::InsertElement, Ty, i); |
95 | } |
96 | return Cost; |
97 | } |
98 | |
99 | /// Estimate a cost of shuffle as a sequence of extract and insert |
100 | /// operations. |
101 | unsigned getPermuteShuffleOverhead(Type *Ty) { |
102 | assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 102, __PRETTY_FUNCTION__)); |
103 | unsigned Cost = 0; |
104 | // Shuffle cost is equal to the cost of extracting element from its argument |
105 | // plus the cost of inserting them onto the result vector. |
106 | |
107 | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from |
108 | // index 0 of first vector, index 1 of second vector,index 2 of first |
109 | // vector and finally index 3 of second vector and insert them at index |
110 | // <0,1,2,3> of result vector. |
111 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
112 | Cost += static_cast<T *>(this) |
113 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
114 | Cost += static_cast<T *>(this) |
115 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
116 | } |
117 | return Cost; |
118 | } |
119 | |
120 | /// Estimate a cost of subvector extraction as a sequence of extract and |
121 | /// insert operations. |
122 | unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
123 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)) |
124 | "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 124, __PRETTY_FUNCTION__)); |
125 | int NumSubElts = SubTy->getVectorNumElements(); |
126 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)) |
127 | "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_ExtractSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 127, __PRETTY_FUNCTION__)); |
128 | |
129 | unsigned Cost = 0; |
130 | // Subvector extraction cost is equal to the cost of extracting element from |
131 | // the source type plus the cost of inserting them into the result vector |
132 | // type. |
133 | for (int i = 0; i != NumSubElts; ++i) { |
134 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
135 | Instruction::ExtractElement, Ty, i + Index); |
136 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
137 | Instruction::InsertElement, SubTy, i); |
138 | } |
139 | return Cost; |
140 | } |
141 | |
142 | /// Estimate a cost of subvector insertion as a sequence of extract and |
143 | /// insert operations. |
144 | unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { |
145 | assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)) |
146 | "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)); |
147 | int NumSubElts = SubTy->getVectorNumElements(); |
148 | assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)) |
149 | "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements () && "SK_InsertSubvector index out of range") ? static_cast <void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 149, __PRETTY_FUNCTION__)); |
150 | |
151 | unsigned Cost = 0; |
152 | // Subvector insertion cost is equal to the cost of extracting element from |
153 | // the source type plus the cost of inserting them into the result vector |
154 | // type. |
155 | for (int i = 0; i != NumSubElts; ++i) { |
156 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
157 | Instruction::ExtractElement, SubTy, i); |
158 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
159 | Instruction::InsertElement, Ty, i + Index); |
160 | } |
161 | return Cost; |
162 | } |
163 | |
164 | /// Local query method delegates up to T which *must* implement this! |
165 | const TargetSubtargetInfo *getST() const { |
166 | return static_cast<const T *>(this)->getST(); |
167 | } |
168 | |
169 | /// Local query method delegates up to T which *must* implement this! |
170 | const TargetLoweringBase *getTLI() const { |
171 | return static_cast<const T *>(this)->getTLI(); |
172 | } |
173 | |
174 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { |
175 | switch (M) { |
176 | case TTI::MIM_Unindexed: |
177 | return ISD::UNINDEXED; |
178 | case TTI::MIM_PreInc: |
179 | return ISD::PRE_INC; |
180 | case TTI::MIM_PreDec: |
181 | return ISD::PRE_DEC; |
182 | case TTI::MIM_PostInc: |
183 | return ISD::POST_INC; |
184 | case TTI::MIM_PostDec: |
185 | return ISD::POST_DEC; |
186 | } |
187 | llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 187); |
188 | } |
189 | |
190 | protected: |
191 | explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) |
192 | : BaseT(DL) {} |
193 | |
194 | using TargetTransformInfoImplBase::DL; |
195 | |
196 | public: |
197 | /// \name Scalar TTI Implementations |
198 | /// @{ |
199 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, |
200 | unsigned BitWidth, unsigned AddressSpace, |
201 | unsigned Alignment, bool *Fast) const { |
202 | EVT E = EVT::getIntegerVT(Context, BitWidth); |
203 | return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast); |
204 | } |
205 | |
206 | bool hasBranchDivergence() { return false; } |
207 | |
208 | bool isSourceOfDivergence(const Value *V) { return false; } |
209 | |
210 | bool isAlwaysUniform(const Value *V) { return false; } |
211 | |
212 | unsigned getFlatAddressSpace() { |
213 | // Return an invalid address space. |
214 | return -1; |
215 | } |
216 | |
217 | bool isLegalAddImmediate(int64_t imm) { |
218 | return getTLI()->isLegalAddImmediate(imm); |
219 | } |
220 | |
221 | bool isLegalICmpImmediate(int64_t imm) { |
222 | return getTLI()->isLegalICmpImmediate(imm); |
223 | } |
224 | |
225 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
226 | bool HasBaseReg, int64_t Scale, |
227 | unsigned AddrSpace, Instruction *I = nullptr) { |
228 | TargetLoweringBase::AddrMode AM; |
229 | AM.BaseGV = BaseGV; |
230 | AM.BaseOffs = BaseOffset; |
231 | AM.HasBaseReg = HasBaseReg; |
232 | AM.Scale = Scale; |
233 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); |
234 | } |
235 | |
236 | bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, |
237 | const DataLayout &DL) const { |
238 | EVT VT = getTLI()->getValueType(DL, Ty); |
239 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); |
240 | } |
241 | |
242 | bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, |
243 | const DataLayout &DL) const { |
244 | EVT VT = getTLI()->getValueType(DL, Ty); |
245 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); |
246 | } |
247 | |
248 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { |
249 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); |
250 | } |
251 | |
252 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
253 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { |
254 | TargetLoweringBase::AddrMode AM; |
255 | AM.BaseGV = BaseGV; |
256 | AM.BaseOffs = BaseOffset; |
257 | AM.HasBaseReg = HasBaseReg; |
258 | AM.Scale = Scale; |
259 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); |
260 | } |
261 | |
262 | bool isTruncateFree(Type *Ty1, Type *Ty2) { |
263 | return getTLI()->isTruncateFree(Ty1, Ty2); |
264 | } |
265 | |
266 | bool isProfitableToHoist(Instruction *I) { |
267 | return getTLI()->isProfitableToHoist(I); |
268 | } |
269 | |
270 | bool useAA() const { return getST()->useAA(); } |
271 | |
272 | bool isTypeLegal(Type *Ty) { |
273 | EVT VT = getTLI()->getValueType(DL, Ty); |
274 | return getTLI()->isTypeLegal(VT); |
275 | } |
276 | |
277 | int getGEPCost(Type *PointeeType, const Value *Ptr, |
278 | ArrayRef<const Value *> Operands) { |
279 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); |
280 | } |
281 | |
282 | int getExtCost(const Instruction *I, const Value *Src) { |
283 | if (getTLI()->isExtFree(I)) |
284 | return TargetTransformInfo::TCC_Free; |
285 | |
286 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)) |
287 | if (const LoadInst *LI = dyn_cast<LoadInst>(Src)) |
288 | if (getTLI()->isExtLoad(LI, I, DL)) |
289 | return TargetTransformInfo::TCC_Free; |
290 | |
291 | return TargetTransformInfo::TCC_Basic; |
292 | } |
293 | |
294 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
295 | ArrayRef<const Value *> Arguments, const User *U) { |
296 | return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); |
297 | } |
298 | |
299 | unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, |
300 | ArrayRef<Type *> ParamTys, const User *U) { |
301 | if (IID == Intrinsic::cttz) { |
302 | if (getTLI()->isCheapToSpeculateCttz()) |
303 | return TargetTransformInfo::TCC_Basic; |
304 | return TargetTransformInfo::TCC_Expensive; |
305 | } |
306 | |
307 | if (IID == Intrinsic::ctlz) { |
308 | if (getTLI()->isCheapToSpeculateCtlz()) |
309 | return TargetTransformInfo::TCC_Basic; |
310 | return TargetTransformInfo::TCC_Expensive; |
311 | } |
312 | |
313 | return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); |
314 | } |
315 | |
316 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
317 | unsigned &JumpTableSize) { |
318 | /// Try to find the estimated number of clusters. Note that the number of |
319 | /// clusters identified in this function could be different from the actural |
320 | /// numbers found in lowering. This function ignore switches that are |
321 | /// lowered with a mix of jump table / bit test / BTree. This function was |
322 | /// initially intended to be used when estimating the cost of switch in |
323 | /// inline cost heuristic, but it's a generic cost model to be used in other |
324 | /// places (e.g., in loop unrolling). |
325 | unsigned N = SI.getNumCases(); |
326 | const TargetLoweringBase *TLI = getTLI(); |
327 | const DataLayout &DL = this->getDataLayout(); |
328 | |
329 | JumpTableSize = 0; |
330 | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); |
331 | |
332 | // Early exit if both a jump table and bit test are not allowed. |
333 | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N)) |
334 | return N; |
335 | |
336 | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); |
337 | APInt MinCaseVal = MaxCaseVal; |
338 | for (auto CI : SI.cases()) { |
339 | const APInt &CaseVal = CI.getCaseValue()->getValue(); |
340 | if (CaseVal.sgt(MaxCaseVal)) |
341 | MaxCaseVal = CaseVal; |
342 | if (CaseVal.slt(MinCaseVal)) |
343 | MinCaseVal = CaseVal; |
344 | } |
345 | |
346 | // Check if suitable for a bit test |
347 | if (N <= DL.getIndexSizeInBits(0u)) { |
348 | SmallPtrSet<const BasicBlock *, 4> Dests; |
349 | for (auto I : SI.cases()) |
350 | Dests.insert(I.getCaseSuccessor()); |
351 | |
352 | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, |
353 | DL)) |
354 | return 1; |
355 | } |
356 | |
357 | // Check if suitable for a jump table. |
358 | if (IsJTAllowed) { |
359 | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) |
360 | return N; |
361 | uint64_t Range = |
362 | (MaxCaseVal - MinCaseVal) |
363 | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; |
364 | // Check whether a range of clusters is dense enough for a jump table |
365 | if (TLI->isSuitableForJumpTable(&SI, N, Range)) { |
366 | JumpTableSize = Range; |
367 | return 1; |
368 | } |
369 | } |
370 | return N; |
371 | } |
372 | |
373 | unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); } |
374 | |
375 | unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); } |
376 | |
377 | bool shouldBuildLookupTables() { |
378 | const TargetLoweringBase *TLI = getTLI(); |
379 | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
380 | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
381 | } |
382 | |
383 | bool haveFastSqrt(Type *Ty) { |
384 | const TargetLoweringBase *TLI = getTLI(); |
385 | EVT VT = TLI->getValueType(DL, Ty); |
386 | return TLI->isTypeLegal(VT) && |
387 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); |
388 | } |
389 | |
390 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { |
391 | return true; |
392 | } |
393 | |
394 | unsigned getFPOpCost(Type *Ty) { |
395 | // Check whether FADD is available, as a proxy for floating-point in |
396 | // general. |
397 | const TargetLoweringBase *TLI = getTLI(); |
398 | EVT VT = TLI->getValueType(DL, Ty); |
399 | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) |
400 | return TargetTransformInfo::TCC_Basic; |
401 | return TargetTransformInfo::TCC_Expensive; |
402 | } |
403 | |
404 | unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { |
405 | const TargetLoweringBase *TLI = getTLI(); |
406 | switch (Opcode) { |
407 | default: break; |
408 | case Instruction::Trunc: |
409 | if (TLI->isTruncateFree(OpTy, Ty)) |
410 | return TargetTransformInfo::TCC_Free; |
411 | return TargetTransformInfo::TCC_Basic; |
412 | case Instruction::ZExt: |
413 | if (TLI->isZExtFree(OpTy, Ty)) |
414 | return TargetTransformInfo::TCC_Free; |
415 | return TargetTransformInfo::TCC_Basic; |
416 | } |
417 | |
418 | return BaseT::getOperationCost(Opcode, Ty, OpTy); |
419 | } |
420 | |
421 | unsigned getInliningThresholdMultiplier() { return 1; } |
422 | |
423 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
424 | TTI::UnrollingPreferences &UP) { |
425 | // This unrolling functionality is target independent, but to provide some |
426 | // motivation for its intended use, for x86: |
427 | |
428 | // According to the Intel 64 and IA-32 Architectures Optimization Reference |
429 | // Manual, Intel Core models and later have a loop stream detector (and |
430 | // associated uop queue) that can benefit from partial unrolling. |
431 | // The relevant requirements are: |
432 | // - The loop must have no more than 4 (8 for Nehalem and later) branches |
433 | // taken, and none of them may be calls. |
434 | // - The loop can have no more than 18 (28 for Nehalem and later) uops. |
435 | |
436 | // According to the Software Optimization Guide for AMD Family 15h |
437 | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor |
438 | // and loop buffer which can benefit from partial unrolling. |
439 | // The relevant requirements are: |
440 | // - The loop must have fewer than 16 branches |
441 | // - The loop must have less than 40 uops in all executed loop branches |
442 | |
443 | // The number of taken branches in a loop is hard to estimate here, and |
444 | // benchmarking has revealed that it is better not to be conservative when |
445 | // estimating the branch count. As a result, we'll ignore the branch limits |
446 | // until someone finds a case where it matters in practice. |
447 | |
448 | unsigned MaxOps; |
449 | const TargetSubtargetInfo *ST = getST(); |
450 | if (PartialUnrollingThreshold.getNumOccurrences() > 0) |
451 | MaxOps = PartialUnrollingThreshold; |
452 | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) |
453 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; |
454 | else |
455 | return; |
456 | |
457 | // Scan the loop: don't unroll loops with calls. |
458 | for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; |
459 | ++I) { |
460 | BasicBlock *BB = *I; |
461 | |
462 | for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) |
463 | if (isa<CallInst>(J) || isa<InvokeInst>(J)) { |
464 | ImmutableCallSite CS(&*J); |
465 | if (const Function *F = CS.getCalledFunction()) { |
466 | if (!static_cast<T *>(this)->isLoweredToCall(F)) |
467 | continue; |
468 | } |
469 | |
470 | return; |
471 | } |
472 | } |
473 | |
474 | // Enable runtime and partial unrolling up to the specified size. |
475 | // Enable using trip count upper bound to unroll loops. |
476 | UP.Partial = UP.Runtime = UP.UpperBound = true; |
477 | UP.PartialThreshold = MaxOps; |
478 | |
479 | // Avoid unrolling when optimizing for size. |
480 | UP.OptSizeThreshold = 0; |
481 | UP.PartialOptSizeThreshold = 0; |
482 | |
483 | // Set number of instructions optimized when "back edge" |
484 | // becomes "fall through" to default value of 2. |
485 | UP.BEInsns = 2; |
486 | } |
487 | |
488 | int getInstructionLatency(const Instruction *I) { |
489 | if (isa<LoadInst>(I)) |
490 | return getST()->getSchedModel().DefaultLoadLatency; |
491 | |
492 | return BaseT::getInstructionLatency(I); |
493 | } |
494 | |
495 | /// @} |
496 | |
497 | /// \name Vector TTI Implementations |
498 | /// @{ |
499 | |
500 | unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; } |
501 | |
502 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } |
503 | |
504 | /// Estimate the overhead of scalarizing an instruction. Insert and Extract |
505 | /// are set if the result needs to be inserted and/or extracted from vectors. |
506 | unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { |
507 | assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors" ) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 507, __PRETTY_FUNCTION__)); |
508 | unsigned Cost = 0; |
509 | |
510 | for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { |
511 | if (Insert) |
512 | Cost += static_cast<T *>(this) |
513 | ->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
514 | if (Extract) |
515 | Cost += static_cast<T *>(this) |
516 | ->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
517 | } |
518 | |
519 | return Cost; |
520 | } |
521 | |
522 | /// Estimate the overhead of scalarizing an instructions unique |
523 | /// non-constant operands. The types of the arguments are ordinarily |
524 | /// scalar, in which case the costs are multiplied with VF. |
525 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
526 | unsigned VF) { |
527 | unsigned Cost = 0; |
528 | SmallPtrSet<const Value*, 4> UniqueOperands; |
529 | for (const Value *A : Args) { |
530 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { |
531 | Type *VecTy = nullptr; |
532 | if (A->getType()->isVectorTy()) { |
533 | VecTy = A->getType(); |
534 | // If A is a vector operand, VF should be 1 or correspond to A. |
535 | assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 536, __PRETTY_FUNCTION__)) |
536 | "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) && "Vector argument does not match VF") ? static_cast<void> (0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 536, __PRETTY_FUNCTION__)); |
537 | } |
538 | else |
539 | VecTy = VectorType::get(A->getType(), VF); |
540 | |
541 | Cost += getScalarizationOverhead(VecTy, false, true); |
542 | } |
543 | } |
544 | |
545 | return Cost; |
546 | } |
547 | |
548 | unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { |
549 | assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 549, __PRETTY_FUNCTION__)); |
550 | |
551 | unsigned Cost = 0; |
552 | |
553 | Cost += getScalarizationOverhead(VecTy, true, false); |
554 | if (!Args.empty()) |
555 | Cost += getOperandsScalarizationOverhead(Args, |
556 | VecTy->getVectorNumElements()); |
557 | else |
558 | // When no information on arguments is provided, we add the cost |
559 | // associated with one argument as a heuristic. |
560 | Cost += getScalarizationOverhead(VecTy, false, true); |
561 | |
562 | return Cost; |
563 | } |
564 | |
565 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
566 | |
567 | unsigned getArithmeticInstrCost( |
568 | unsigned Opcode, Type *Ty, |
569 | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
570 | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
571 | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
572 | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
573 | ArrayRef<const Value *> Args = ArrayRef<const Value *>()) { |
574 | // Check if any of the operands are vector operands. |
575 | const TargetLoweringBase *TLI = getTLI(); |
576 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
577 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 577, __PRETTY_FUNCTION__)); |
578 | |
579 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
580 | |
581 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
582 | // Assume that floating point arithmetic operations cost twice as much as |
583 | // integer operations. |
584 | unsigned OpCost = (IsFloat ? 2 : 1); |
585 | |
586 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
587 | // The operation is legal. Assume it costs 1. |
588 | // TODO: Once we have extract/insert subvector cost we need to use them. |
589 | return LT.first * OpCost; |
590 | } |
591 | |
592 | if (!TLI->isOperationExpand(ISD, LT.second)) { |
593 | // If the operation is custom lowered, then assume that the code is twice |
594 | // as expensive. |
595 | return LT.first * 2 * OpCost; |
596 | } |
597 | |
598 | // Else, assume that we need to scalarize this op. |
599 | // TODO: If one of the types get legalized by splitting, handle this |
600 | // similarly to what getCastInstrCost() does. |
601 | if (Ty->isVectorTy()) { |
602 | unsigned Num = Ty->getVectorNumElements(); |
603 | unsigned Cost = static_cast<T *>(this) |
604 | ->getArithmeticInstrCost(Opcode, Ty->getScalarType()); |
605 | // Return the cost of multiple scalar invocation plus the cost of |
606 | // inserting and extracting the values. |
607 | return getScalarizationOverhead(Ty, Args) + Num * Cost; |
608 | } |
609 | |
610 | // We don't know anything about this scalar instruction. |
611 | return OpCost; |
612 | } |
613 | |
614 | unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, |
615 | Type *SubTp) { |
616 | switch (Kind) { |
617 | case TTI::SK_Broadcast: |
618 | return getBroadcastShuffleOverhead(Tp); |
619 | case TTI::SK_Select: |
620 | case TTI::SK_Reverse: |
621 | case TTI::SK_Transpose: |
622 | case TTI::SK_PermuteSingleSrc: |
623 | case TTI::SK_PermuteTwoSrc: |
624 | return getPermuteShuffleOverhead(Tp); |
625 | case TTI::SK_ExtractSubvector: |
626 | return getExtractSubvectorOverhead(Tp, Index, SubTp); |
627 | case TTI::SK_InsertSubvector: |
628 | return getInsertSubvectorOverhead(Tp, Index, SubTp); |
629 | } |
630 | llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 630); |
631 | } |
632 | |
633 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
634 | const Instruction *I = nullptr) { |
635 | const TargetLoweringBase *TLI = getTLI(); |
636 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
637 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 637, __PRETTY_FUNCTION__)); |
638 | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); |
639 | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); |
640 | |
641 | // Check for NOOP conversions. |
642 | if (SrcLT.first == DstLT.first && |
643 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
644 | |
645 | // Bitcast between types that are legalized to the same type are free. |
646 | if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) |
647 | return 0; |
648 | } |
649 | |
650 | if (Opcode == Instruction::Trunc && |
651 | TLI->isTruncateFree(SrcLT.second, DstLT.second)) |
652 | return 0; |
653 | |
654 | if (Opcode == Instruction::ZExt && |
655 | TLI->isZExtFree(SrcLT.second, DstLT.second)) |
656 | return 0; |
657 | |
658 | if (Opcode == Instruction::AddrSpaceCast && |
659 | TLI->isNoopAddrSpaceCast(Src->getPointerAddressSpace(), |
660 | Dst->getPointerAddressSpace())) |
661 | return 0; |
662 | |
663 | // If this is a zext/sext of a load, return 0 if the corresponding |
664 | // extending load exists on target. |
665 | if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && |
666 | I && isa<LoadInst>(I->getOperand(0))) { |
667 | EVT ExtVT = EVT::getEVT(Dst); |
668 | EVT LoadVT = EVT::getEVT(Src); |
669 | unsigned LType = |
670 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); |
671 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) |
672 | return 0; |
673 | } |
674 | |
675 | // If the cast is marked as legal (or promote) then assume low cost. |
676 | if (SrcLT.first == DstLT.first && |
677 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)) |
678 | return 1; |
679 | |
680 | // Handle scalar conversions. |
681 | if (!Src->isVectorTy() && !Dst->isVectorTy()) { |
682 | // Scalar bitcasts are usually free. |
683 | if (Opcode == Instruction::BitCast) |
684 | return 0; |
685 | |
686 | // Just check the op cost. If the operation is legal then assume it costs |
687 | // 1. |
688 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
689 | return 1; |
690 | |
691 | // Assume that illegal scalar instruction are expensive. |
692 | return 4; |
693 | } |
694 | |
695 | // Check vector-to-vector casts. |
696 | if (Dst->isVectorTy() && Src->isVectorTy()) { |
697 | // If the cast is between same-sized registers, then the check is simple. |
698 | if (SrcLT.first == DstLT.first && |
699 | SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { |
700 | |
701 | // Assume that Zext is done using AND. |
702 | if (Opcode == Instruction::ZExt) |
703 | return 1; |
704 | |
705 | // Assume that sext is done using SHL and SRA. |
706 | if (Opcode == Instruction::SExt) |
707 | return 2; |
708 | |
709 | // Just check the op cost. If the operation is legal then assume it |
710 | // costs |
711 | // 1 and multiply by the type-legalization overhead. |
712 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
713 | return SrcLT.first * 1; |
714 | } |
715 | |
716 | // If we are legalizing by splitting, query the concrete TTI for the cost |
717 | // of casting the original vector twice. We also need to factor in the |
718 | // cost of the split itself. Count that as 1, to be consistent with |
719 | // TLI->getTypeLegalizationCost(). |
720 | if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == |
721 | TargetLowering::TypeSplitVector) || |
722 | (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == |
723 | TargetLowering::TypeSplitVector)) { |
724 | Type *SplitDst = VectorType::get(Dst->getVectorElementType(), |
725 | Dst->getVectorNumElements() / 2); |
726 | Type *SplitSrc = VectorType::get(Src->getVectorElementType(), |
727 | Src->getVectorNumElements() / 2); |
728 | T *TTI = static_cast<T *>(this); |
729 | return TTI->getVectorSplitCost() + |
730 | (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); |
731 | } |
732 | |
733 | // In other cases where the source or destination are illegal, assume |
734 | // the operation will get scalarized. |
735 | unsigned Num = Dst->getVectorNumElements(); |
736 | unsigned Cost = static_cast<T *>(this)->getCastInstrCost( |
737 | Opcode, Dst->getScalarType(), Src->getScalarType(), I); |
738 | |
739 | // Return the cost of multiple scalar invocation plus the cost of |
740 | // inserting and extracting the values. |
741 | return getScalarizationOverhead(Dst, true, true) + Num * Cost; |
742 | } |
743 | |
744 | // We already handled vector-to-vector and scalar-to-scalar conversions. |
745 | // This |
746 | // is where we handle bitcast between vectors and scalars. We need to assume |
747 | // that the conversion is scalarized in one way or another. |
748 | if (Opcode == Instruction::BitCast) |
749 | // Illegal bitcasts are done by storing and loading from a stack slot. |
750 | return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true) |
751 | : 0) + |
752 | (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false) |
753 | : 0); |
754 | |
755 | llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 755); |
756 | } |
757 | |
758 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
759 | VectorType *VecTy, unsigned Index) { |
760 | return static_cast<T *>(this)->getVectorInstrCost( |
761 | Instruction::ExtractElement, VecTy, Index) + |
762 | static_cast<T *>(this)->getCastInstrCost(Opcode, Dst, |
763 | VecTy->getElementType()); |
764 | } |
765 | |
766 | unsigned getCFInstrCost(unsigned Opcode) { |
767 | // Branches are assumed to be predicted. |
768 | return 0; |
769 | } |
770 | |
771 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
772 | const Instruction *I) { |
773 | const TargetLoweringBase *TLI = getTLI(); |
774 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
775 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 775, __PRETTY_FUNCTION__)); |
776 | |
777 | // Selects on vectors are actually vector selects. |
778 | if (ISD == ISD::SELECT) { |
779 | assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void > (0) : __assert_fail ("CondTy && \"CondTy must exist\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 779, __PRETTY_FUNCTION__)); |
780 | if (CondTy->isVectorTy()) |
781 | ISD = ISD::VSELECT; |
782 | } |
783 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
784 | |
785 | if (!(ValTy->isVectorTy() && !LT.second.isVector()) && |
786 | !TLI->isOperationExpand(ISD, LT.second)) { |
787 | // The operation is legal. Assume it costs 1. Multiply |
788 | // by the type-legalization overhead. |
789 | return LT.first * 1; |
790 | } |
791 | |
792 | // Otherwise, assume that the cast is scalarized. |
793 | // TODO: If one of the types get legalized by splitting, handle this |
794 | // similarly to what getCastInstrCost() does. |
795 | if (ValTy->isVectorTy()) { |
796 | unsigned Num = ValTy->getVectorNumElements(); |
797 | if (CondTy) |
798 | CondTy = CondTy->getScalarType(); |
799 | unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost( |
800 | Opcode, ValTy->getScalarType(), CondTy, I); |
801 | |
802 | // Return the cost of multiple scalar invocation plus the cost of |
803 | // inserting and extracting the values. |
804 | return getScalarizationOverhead(ValTy, true, false) + Num * Cost; |
805 | } |
806 | |
807 | // Unknown scalar opcode. |
808 | return 1; |
809 | } |
810 | |
811 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { |
812 | std::pair<unsigned, MVT> LT = |
813 | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); |
814 | |
815 | return LT.first; |
816 | } |
817 | |
818 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, |
819 | unsigned AddressSpace, const Instruction *I = nullptr) { |
820 | assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast <void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 820, __PRETTY_FUNCTION__)); |
821 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); |
822 | |
823 | // Assuming that all loads of legal types cost 1. |
824 | unsigned Cost = LT.first; |
825 | |
826 | if (Src->isVectorTy() && |
827 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { |
828 | // This is a vector load that legalizes to a larger type than the vector |
829 | // itself. Unless the corresponding extending load or truncating store is |
830 | // legal, then this will scalarize. |
831 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; |
832 | EVT MemVT = getTLI()->getValueType(DL, Src); |
833 | if (Opcode == Instruction::Store) |
834 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); |
835 | else |
836 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); |
837 | |
838 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { |
839 | // This is a vector load/store for some illegal type that is scalarized. |
840 | // We must account for the cost of building or decomposing the vector. |
841 | Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, |
842 | Opcode == Instruction::Store); |
843 | } |
844 | } |
845 | |
846 | return Cost; |
847 | } |
848 | |
849 | unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, |
850 | unsigned Factor, |
851 | ArrayRef<unsigned> Indices, |
852 | unsigned Alignment, unsigned AddressSpace, |
853 | bool UseMaskForCond = false, |
854 | bool UseMaskForGaps = false) { |
855 | VectorType *VT = dyn_cast<VectorType>(VecTy); |
856 | assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 856, __PRETTY_FUNCTION__)); |
857 | |
858 | unsigned NumElts = VT->getNumElements(); |
859 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor" ) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 859, __PRETTY_FUNCTION__)); |
860 | |
861 | unsigned NumSubElts = NumElts / Factor; |
862 | VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); |
863 | |
864 | // Firstly, the cost of load/store operation. |
865 | unsigned Cost; |
866 | if (UseMaskForCond || UseMaskForGaps) |
867 | Cost = static_cast<T *>(this)->getMaskedMemoryOpCost( |
868 | Opcode, VecTy, Alignment, AddressSpace); |
869 | else |
870 | Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment, |
871 | AddressSpace); |
872 | |
873 | // Legalize the vector type, and get the legalized and unlegalized type |
874 | // sizes. |
875 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
876 | unsigned VecTySize = |
877 | static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy); |
878 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); |
879 | |
880 | // Return the ceiling of dividing A by B. |
881 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; |
882 | |
883 | // Scale the cost of the memory operation by the fraction of legalized |
884 | // instructions that will actually be used. We shouldn't account for the |
885 | // cost of dead instructions since they will be removed. |
886 | // |
887 | // E.g., An interleaved load of factor 8: |
888 | // %vec = load <16 x i64>, <16 x i64>* %ptr |
889 | // %v0 = shufflevector %vec, undef, <0, 8> |
890 | // |
891 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be |
892 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized |
893 | // type). The other loads are unused. |
894 | // |
895 | // We only scale the cost of loads since interleaved store groups aren't |
896 | // allowed to have gaps. |
897 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { |
898 | // The number of loads of a legal type it will take to represent a load |
899 | // of the unlegalized vector type. |
900 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); |
901 | |
902 | // The number of elements of the unlegalized type that correspond to a |
903 | // single legal instruction. |
904 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); |
905 | |
906 | // Determine which legal instructions will be used. |
907 | BitVector UsedInsts(NumLegalInsts, false); |
908 | for (unsigned Index : Indices) |
909 | for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) |
910 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); |
911 | |
912 | // Scale the cost of the load by the fraction of legal instructions that |
913 | // will be used. |
914 | Cost *= UsedInsts.count() / NumLegalInsts; |
915 | } |
916 | |
917 | // Then plus the cost of interleave operation. |
918 | if (Opcode == Instruction::Load) { |
919 | // The interleave cost is similar to extract sub vectors' elements |
920 | // from the wide vector, and insert them into sub vectors. |
921 | // |
922 | // E.g. An interleaved load of factor 2 (with one member of index 0): |
923 | // %vec = load <8 x i32>, <8 x i32>* %ptr |
924 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 |
925 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the |
926 | // <8 x i32> vector and insert them into a <4 x i32> vector. |
927 | |
928 | assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 929, __PRETTY_FUNCTION__)) |
929 | "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 929, __PRETTY_FUNCTION__)); |
930 | |
931 | for (unsigned Index : Indices) { |
932 | assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 932, __PRETTY_FUNCTION__)); |
933 | |
934 | // Extract elements from loaded vector for each sub vector. |
935 | for (unsigned i = 0; i < NumSubElts; i++) |
936 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
937 | Instruction::ExtractElement, VT, Index + i * Factor); |
938 | } |
939 | |
940 | unsigned InsSubCost = 0; |
941 | for (unsigned i = 0; i < NumSubElts; i++) |
942 | InsSubCost += static_cast<T *>(this)->getVectorInstrCost( |
943 | Instruction::InsertElement, SubVT, i); |
944 | |
945 | Cost += Indices.size() * InsSubCost; |
946 | } else { |
947 | // The interleave cost is extract all elements from sub vectors, and |
948 | // insert them into the wide vector. |
949 | // |
950 | // E.g. An interleaved store of factor 2: |
951 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> |
952 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr |
953 | // The cost is estimated as extract all elements from both <4 x i32> |
954 | // vectors and insert into the <8 x i32> vector. |
955 | |
956 | unsigned ExtSubCost = 0; |
957 | for (unsigned i = 0; i < NumSubElts; i++) |
958 | ExtSubCost += static_cast<T *>(this)->getVectorInstrCost( |
959 | Instruction::ExtractElement, SubVT, i); |
960 | Cost += ExtSubCost * Factor; |
961 | |
962 | for (unsigned i = 0; i < NumElts; i++) |
963 | Cost += static_cast<T *>(this) |
964 | ->getVectorInstrCost(Instruction::InsertElement, VT, i); |
965 | } |
966 | |
967 | if (!UseMaskForCond) |
968 | return Cost; |
969 | |
970 | Type *I8Type = Type::getInt8Ty(VT->getContext()); |
971 | VectorType *MaskVT = VectorType::get(I8Type, NumElts); |
972 | SubVT = VectorType::get(I8Type, NumSubElts); |
973 | |
974 | // The Mask shuffling cost is extract all the elements of the Mask |
975 | // and insert each of them Factor times into the wide vector: |
976 | // |
977 | // E.g. an interleaved group with factor 3: |
978 | // %mask = icmp ult <8 x i32> %vec1, %vec2 |
979 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, |
980 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> |
981 | // The cost is estimated as extract all mask elements from the <8xi1> mask |
982 | // vector and insert them factor times into the <24xi1> shuffled mask |
983 | // vector. |
984 | for (unsigned i = 0; i < NumSubElts; i++) |
985 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
986 | Instruction::ExtractElement, SubVT, i); |
987 | |
988 | for (unsigned i = 0; i < NumElts; i++) |
989 | Cost += static_cast<T *>(this)->getVectorInstrCost( |
990 | Instruction::InsertElement, MaskVT, i); |
991 | |
992 | // The Gaps mask is invariant and created outside the loop, therefore the |
993 | // cost of creating it is not accounted for here. However if we have both |
994 | // a MaskForGaps and some other mask that guards the execution of the |
995 | // memory access, we need to account for the cost of And-ing the two masks |
996 | // inside the loop. |
997 | if (UseMaskForGaps) |
998 | Cost += static_cast<T *>(this)->getArithmeticInstrCost( |
999 | BinaryOperator::And, MaskVT); |
1000 | |
1001 | return Cost; |
1002 | } |
1003 | |
1004 | /// Get intrinsic cost based on arguments. |
1005 | unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, |
1006 | ArrayRef<Value *> Args, FastMathFlags FMF, |
1007 | unsigned VF = 1) { |
1008 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); |
1009 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type" ) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 1009, __PRETTY_FUNCTION__)); |
1010 | auto *ConcreteTTI = static_cast<T *>(this); |
1011 | |
1012 | switch (IID) { |
1013 | default: { |
1014 | // Assume that we need to scalarize this intrinsic. |
1015 | SmallVector<Type *, 4> Types; |
1016 | for (Value *Op : Args) { |
1017 | Type *OpTy = Op->getType(); |
1018 | assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 1018, __PRETTY_FUNCTION__)); |
1019 | Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF)); |
1020 | } |
1021 | |
1022 | if (VF > 1 && !RetTy->isVoidTy()) |
1023 | RetTy = VectorType::get(RetTy, VF); |
1024 | |
1025 | // Compute the scalarization overhead based on Args for a vector |
1026 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while |
1027 | // CostModel will pass a vector RetTy and VF is 1. |
1028 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); |
1029 | if (RetVF > 1 || VF > 1) { |
1030 | ScalarizationCost = 0; |
1031 | if (!RetTy->isVoidTy()) |
1032 | ScalarizationCost += getScalarizationOverhead(RetTy, true, false); |
1033 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); |
1034 | } |
1035 | |
1036 | return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, |
1037 | ScalarizationCost); |
1038 | } |
1039 | case Intrinsic::masked_scatter: { |
1040 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 1040, __PRETTY_FUNCTION__)); |
1041 | Value *Mask = Args[3]; |
1042 | bool VarMask = !isa<Constant>(Mask); |
1043 | unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); |
1044 | return ConcreteTTI->getGatherScatterOpCost( |
1045 | Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment); |
1046 | } |
1047 | case Intrinsic::masked_gather: { |
1048 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 1048, __PRETTY_FUNCTION__)); |
1049 | Value *Mask = Args[2]; |
1050 | bool VarMask = !isa<Constant>(Mask); |
1051 | unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); |
1052 | return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy, |
1053 | Args[0], VarMask, Alignment); |
1054 | } |
1055 | case Intrinsic::experimental_vector_reduce_add: |
1056 | case Intrinsic::experimental_vector_reduce_mul: |
1057 | case Intrinsic::experimental_vector_reduce_and: |
1058 | case Intrinsic::experimental_vector_reduce_or: |
1059 | case Intrinsic::experimental_vector_reduce_xor: |
1060 | case Intrinsic::experimental_vector_reduce_fadd: |
1061 | case Intrinsic::experimental_vector_reduce_fmul: |
1062 | case Intrinsic::experimental_vector_reduce_smax: |
1063 | case Intrinsic::experimental_vector_reduce_smin: |
1064 | case Intrinsic::experimental_vector_reduce_fmax: |
1065 | case Intrinsic::experimental_vector_reduce_fmin: |
1066 | case Intrinsic::experimental_vector_reduce_umax: |
1067 | case Intrinsic::experimental_vector_reduce_umin: |
1068 | return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); |
1069 | case Intrinsic::fshl: |
1070 | case Intrinsic::fshr: { |
1071 | Value *X = Args[0]; |
1072 | Value *Y = Args[1]; |
1073 | Value *Z = Args[2]; |
1074 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; |
1075 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); |
1076 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); |
1077 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); |
1078 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; |
1079 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 |
1080 | : TTI::OP_None; |
1081 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) |
1082 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) |
1083 | unsigned Cost = 0; |
1084 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); |
1085 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); |
1086 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, |
1087 | OpKindX, OpKindZ, OpPropsX); |
1088 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, |
1089 | OpKindY, OpKindZ, OpPropsY); |
1090 | // Non-constant shift amounts requires a modulo. |
1091 | if (OpKindZ != TTI::OK_UniformConstantValue && |
1092 | OpKindZ != TTI::OK_NonUniformConstantValue) |
1093 | Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, |
1094 | OpKindZ, OpKindBW, OpPropsZ, |
1095 | OpPropsBW); |
1096 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. |
1097 | if (X != Y) { |
1098 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); |
1099 | if (RetVF > 1) |
1100 | CondTy = VectorType::get(CondTy, RetVF); |
1101 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1102 | CondTy, nullptr); |
1103 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1104 | CondTy, nullptr); |
1105 | } |
1106 | return Cost; |
1107 | } |
1108 | } |
1109 | } |
1110 | |
1111 | /// Get intrinsic cost based on argument types. |
1112 | /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the |
1113 | /// cost of scalarizing the arguments and the return value will be computed |
1114 | /// based on types. |
1115 | unsigned getIntrinsicInstrCost( |
1116 | Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, |
1117 | unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { |
1118 | unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); |
1119 | auto *ConcreteTTI = static_cast<T *>(this); |
1120 | |
1121 | SmallVector<unsigned, 2> ISDs; |
1122 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. |
1123 | switch (IID) { |
1124 | default: { |
1125 | // Assume that we need to scalarize this intrinsic. |
1126 | unsigned ScalarizationCost = ScalarizationCostPassed; |
1127 | unsigned ScalarCalls = 1; |
1128 | Type *ScalarRetTy = RetTy; |
1129 | if (RetTy->isVectorTy()) { |
1130 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1131 | ScalarizationCost = getScalarizationOverhead(RetTy, true, false); |
1132 | ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); |
1133 | ScalarRetTy = RetTy->getScalarType(); |
1134 | } |
1135 | SmallVector<Type *, 4> ScalarTys; |
1136 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1137 | Type *Ty = Tys[i]; |
1138 | if (Ty->isVectorTy()) { |
1139 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1140 | ScalarizationCost += getScalarizationOverhead(Ty, false, true); |
1141 | ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); |
1142 | Ty = Ty->getScalarType(); |
1143 | } |
1144 | ScalarTys.push_back(Ty); |
1145 | } |
1146 | if (ScalarCalls == 1) |
1147 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. |
1148 | |
1149 | unsigned ScalarCost = |
1150 | ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); |
1151 | |
1152 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1153 | } |
1154 | // Look for intrinsics that can be lowered directly or turned into a scalar |
1155 | // intrinsic call. |
1156 | case Intrinsic::sqrt: |
1157 | ISDs.push_back(ISD::FSQRT); |
1158 | break; |
1159 | case Intrinsic::sin: |
1160 | ISDs.push_back(ISD::FSIN); |
1161 | break; |
1162 | case Intrinsic::cos: |
1163 | ISDs.push_back(ISD::FCOS); |
1164 | break; |
1165 | case Intrinsic::exp: |
1166 | ISDs.push_back(ISD::FEXP); |
1167 | break; |
1168 | case Intrinsic::exp2: |
1169 | ISDs.push_back(ISD::FEXP2); |
1170 | break; |
1171 | case Intrinsic::log: |
1172 | ISDs.push_back(ISD::FLOG); |
1173 | break; |
1174 | case Intrinsic::log10: |
1175 | ISDs.push_back(ISD::FLOG10); |
1176 | break; |
1177 | case Intrinsic::log2: |
1178 | ISDs.push_back(ISD::FLOG2); |
1179 | break; |
1180 | case Intrinsic::fabs: |
1181 | ISDs.push_back(ISD::FABS); |
1182 | break; |
1183 | case Intrinsic::canonicalize: |
1184 | ISDs.push_back(ISD::FCANONICALIZE); |
1185 | break; |
1186 | case Intrinsic::minnum: |
1187 | ISDs.push_back(ISD::FMINNUM); |
1188 | if (FMF.noNaNs()) |
1189 | ISDs.push_back(ISD::FMINIMUM); |
1190 | break; |
1191 | case Intrinsic::maxnum: |
1192 | ISDs.push_back(ISD::FMAXNUM); |
1193 | if (FMF.noNaNs()) |
1194 | ISDs.push_back(ISD::FMAXIMUM); |
1195 | break; |
1196 | case Intrinsic::copysign: |
1197 | ISDs.push_back(ISD::FCOPYSIGN); |
1198 | break; |
1199 | case Intrinsic::floor: |
1200 | ISDs.push_back(ISD::FFLOOR); |
1201 | break; |
1202 | case Intrinsic::ceil: |
1203 | ISDs.push_back(ISD::FCEIL); |
1204 | break; |
1205 | case Intrinsic::trunc: |
1206 | ISDs.push_back(ISD::FTRUNC); |
1207 | break; |
1208 | case Intrinsic::nearbyint: |
1209 | ISDs.push_back(ISD::FNEARBYINT); |
1210 | break; |
1211 | case Intrinsic::rint: |
1212 | ISDs.push_back(ISD::FRINT); |
1213 | break; |
1214 | case Intrinsic::round: |
1215 | ISDs.push_back(ISD::FROUND); |
1216 | break; |
1217 | case Intrinsic::pow: |
1218 | ISDs.push_back(ISD::FPOW); |
1219 | break; |
1220 | case Intrinsic::fma: |
1221 | ISDs.push_back(ISD::FMA); |
1222 | break; |
1223 | case Intrinsic::fmuladd: |
1224 | ISDs.push_back(ISD::FMA); |
1225 | break; |
1226 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. |
1227 | case Intrinsic::lifetime_start: |
1228 | case Intrinsic::lifetime_end: |
1229 | case Intrinsic::sideeffect: |
1230 | return 0; |
1231 | case Intrinsic::masked_store: |
1232 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, |
1233 | 0); |
1234 | case Intrinsic::masked_load: |
1235 | return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); |
1236 | case Intrinsic::experimental_vector_reduce_add: |
1237 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], |
1238 | /*IsPairwiseForm=*/false); |
1239 | case Intrinsic::experimental_vector_reduce_mul: |
1240 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], |
1241 | /*IsPairwiseForm=*/false); |
1242 | case Intrinsic::experimental_vector_reduce_and: |
1243 | return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], |
1244 | /*IsPairwiseForm=*/false); |
1245 | case Intrinsic::experimental_vector_reduce_or: |
1246 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], |
1247 | /*IsPairwiseForm=*/false); |
1248 | case Intrinsic::experimental_vector_reduce_xor: |
1249 | return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], |
1250 | /*IsPairwiseForm=*/false); |
1251 | case Intrinsic::experimental_vector_reduce_fadd: |
1252 | return ConcreteTTI->getArithmeticReductionCost(Instruction::FAdd, Tys[0], |
1253 | /*IsPairwiseForm=*/false); |
1254 | case Intrinsic::experimental_vector_reduce_fmul: |
1255 | return ConcreteTTI->getArithmeticReductionCost(Instruction::FMul, Tys[0], |
1256 | /*IsPairwiseForm=*/false); |
1257 | case Intrinsic::experimental_vector_reduce_smax: |
1258 | case Intrinsic::experimental_vector_reduce_smin: |
1259 | case Intrinsic::experimental_vector_reduce_fmax: |
1260 | case Intrinsic::experimental_vector_reduce_fmin: |
1261 | return ConcreteTTI->getMinMaxReductionCost( |
1262 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1263 | /*IsSigned=*/true); |
1264 | case Intrinsic::experimental_vector_reduce_umax: |
1265 | case Intrinsic::experimental_vector_reduce_umin: |
1266 | return ConcreteTTI->getMinMaxReductionCost( |
1267 | Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, |
1268 | /*IsSigned=*/false); |
1269 | case Intrinsic::sadd_sat: |
1270 | case Intrinsic::ssub_sat: { |
1271 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); |
1272 | if (RetVF > 1) |
1273 | CondTy = VectorType::get(CondTy, RetVF); |
1274 | |
1275 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1276 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat |
1277 | ? Intrinsic::sadd_with_overflow |
1278 | : Intrinsic::ssub_with_overflow; |
1279 | |
1280 | // SatMax -> Overflow && SumDiff < 0 |
1281 | // SatMin -> Overflow && SumDiff >= 0 |
1282 | unsigned Cost = 0; |
1283 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1284 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1285 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, |
1286 | CondTy, nullptr); |
1287 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1288 | CondTy, nullptr); |
1289 | return Cost; |
1290 | } |
1291 | case Intrinsic::uadd_sat: |
1292 | case Intrinsic::usub_sat: { |
1293 | Type *CondTy = Type::getInt1Ty(RetTy->getContext()); |
1294 | if (RetVF > 1) |
1295 | CondTy = VectorType::get(CondTy, RetVF); |
1296 | |
1297 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1298 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat |
1299 | ? Intrinsic::uadd_with_overflow |
1300 | : Intrinsic::usub_with_overflow; |
1301 | |
1302 | unsigned Cost = 0; |
1303 | Cost += ConcreteTTI->getIntrinsicInstrCost( |
1304 | OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); |
1305 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, |
1306 | CondTy, nullptr); |
1307 | return Cost; |
1308 | } |
1309 | case Intrinsic::smul_fix: |
1310 | case Intrinsic::umul_fix: { |
1311 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; |
1312 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); |
1313 | if (RetVF > 1) |
1314 | ExtTy = VectorType::get(ExtTy, RetVF); |
1315 | |
1316 | unsigned ExtOp = |
1317 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1318 | |
1319 | unsigned Cost = 0; |
1320 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); |
1321 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1322 | Cost += |
1323 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); |
1324 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, |
1325 | TTI::OK_AnyValue, |
1326 | TTI::OK_UniformConstantValue); |
1327 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, |
1328 | TTI::OK_AnyValue, |
1329 | TTI::OK_UniformConstantValue); |
1330 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); |
1331 | return Cost; |
1332 | } |
1333 | case Intrinsic::sadd_with_overflow: |
1334 | case Intrinsic::ssub_with_overflow: { |
1335 | Type *SumTy = RetTy->getContainedType(0); |
1336 | Type *OverflowTy = RetTy->getContainedType(1); |
1337 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow |
1338 | ? BinaryOperator::Add |
1339 | : BinaryOperator::Sub; |
1340 | |
1341 | // LHSSign -> LHS >= 0 |
1342 | // RHSSign -> RHS >= 0 |
1343 | // SumSign -> Sum >= 0 |
1344 | // |
1345 | // Add: |
1346 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) |
1347 | // Sub: |
1348 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) |
1349 | unsigned Cost = 0; |
1350 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1351 | Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1352 | OverflowTy, nullptr); |
1353 | Cost += 2 * ConcreteTTI->getCmpSelInstrCost( |
1354 | BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); |
1355 | Cost += |
1356 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); |
1357 | return Cost; |
1358 | } |
1359 | case Intrinsic::uadd_with_overflow: |
1360 | case Intrinsic::usub_with_overflow: { |
1361 | Type *SumTy = RetTy->getContainedType(0); |
1362 | Type *OverflowTy = RetTy->getContainedType(1); |
1363 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow |
1364 | ? BinaryOperator::Add |
1365 | : BinaryOperator::Sub; |
1366 | |
1367 | unsigned Cost = 0; |
1368 | Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); |
1369 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, |
1370 | OverflowTy, nullptr); |
1371 | return Cost; |
1372 | } |
1373 | case Intrinsic::smul_with_overflow: |
1374 | case Intrinsic::umul_with_overflow: { |
1375 | Type *MulTy = RetTy->getContainedType(0); |
1376 | Type *OverflowTy = RetTy->getContainedType(1); |
1377 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; |
1378 | Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); |
1379 | if (MulTy->isVectorTy()) |
1380 | ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); |
1381 | |
1382 | unsigned ExtOp = |
1383 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1384 | |
1385 | unsigned Cost = 0; |
1386 | Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); |
1387 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); |
1388 | Cost += |
1389 | 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); |
1390 | Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, |
1391 | TTI::OK_AnyValue, |
1392 | TTI::OK_UniformConstantValue); |
1393 | |
1394 | if (IID == Intrinsic::smul_with_overflow) |
1395 | Cost += ConcreteTTI->getArithmeticInstrCost( |
1396 | Instruction::AShr, MulTy, TTI::OK_AnyValue, |
1397 | TTI::OK_UniformConstantValue); |
1398 | |
1399 | Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, |
1400 | OverflowTy, nullptr); |
1401 | return Cost; |
1402 | } |
1403 | case Intrinsic::ctpop: |
1404 | ISDs.push_back(ISD::CTPOP); |
1405 | // In case of legalization use TCC_Expensive. This is cheaper than a |
1406 | // library call but still not a cheap instruction. |
1407 | SingleCallCost = TargetTransformInfo::TCC_Expensive; |
1408 | break; |
1409 | // FIXME: ctlz, cttz, ... |
1410 | } |
1411 | |
1412 | const TargetLoweringBase *TLI = getTLI(); |
1413 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); |
1414 | |
1415 | SmallVector<unsigned, 2> LegalCost; |
1416 | SmallVector<unsigned, 2> CustomCost; |
1417 | for (unsigned ISD : ISDs) { |
1418 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
1419 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && |
1420 | TLI->isFAbsFree(LT.second)) { |
1421 | return 0; |
1422 | } |
1423 | |
1424 | // The operation is legal. Assume it costs 1. |
1425 | // If the type is split to multiple registers, assume that there is some |
1426 | // overhead to this. |
1427 | // TODO: Once we have extract/insert subvector cost we need to use them. |
1428 | if (LT.first > 1) |
1429 | LegalCost.push_back(LT.first * 2); |
1430 | else |
1431 | LegalCost.push_back(LT.first * 1); |
1432 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { |
1433 | // If the operation is custom lowered then assume |
1434 | // that the code is twice as expensive. |
1435 | CustomCost.push_back(LT.first * 2); |
1436 | } |
1437 | } |
1438 | |
1439 | auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); |
1440 | if (MinLegalCostI != LegalCost.end()) |
1441 | return *MinLegalCostI; |
1442 | |
1443 | auto MinCustomCostI = |
1444 | std::min_element(CustomCost.begin(), CustomCost.end()); |
1445 | if (MinCustomCostI != CustomCost.end()) |
1446 | return *MinCustomCostI; |
1447 | |
1448 | // If we can't lower fmuladd into an FMA estimate the cost as a floating |
1449 | // point mul followed by an add. |
1450 | if (IID == Intrinsic::fmuladd) |
1451 | return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + |
1452 | ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); |
1453 | |
1454 | // Else, assume that we need to scalarize this intrinsic. For math builtins |
1455 | // this will emit a costly libcall, adding call overhead and spills. Make it |
1456 | // very expensive. |
1457 | if (RetTy->isVectorTy()) { |
1458 | unsigned ScalarizationCost = |
1459 | ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) |
1460 | ? ScalarizationCostPassed |
1461 | : getScalarizationOverhead(RetTy, true, false)); |
1462 | unsigned ScalarCalls = RetTy->getVectorNumElements(); |
1463 | SmallVector<Type *, 4> ScalarTys; |
1464 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1465 | Type *Ty = Tys[i]; |
1466 | if (Ty->isVectorTy()) |
1467 | Ty = Ty->getScalarType(); |
1468 | ScalarTys.push_back(Ty); |
1469 | } |
1470 | unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( |
1471 | IID, RetTy->getScalarType(), ScalarTys, FMF); |
1472 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1473 | if (Tys[i]->isVectorTy()) { |
1474 | if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) |
1475 | ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); |
1476 | ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); |
1477 | } |
1478 | } |
1479 | |
1480 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1481 | } |
1482 | |
1483 | // This is going to be turned into a library call, make it expensive. |
1484 | return SingleCallCost; |
1485 | } |
1486 | |
1487 | /// Compute a cost of the given call instruction. |
1488 | /// |
1489 | /// Compute the cost of calling function F with return type RetTy and |
1490 | /// argument types Tys. F might be nullptr, in this case the cost of an |
1491 | /// arbitrary call with the specified signature will be returned. |
1492 | /// This is used, for instance, when we estimate call of a vector |
1493 | /// counterpart of the given function. |
1494 | /// \param F Called function, might be nullptr. |
1495 | /// \param RetTy Return value types. |
1496 | /// \param Tys Argument types. |
1497 | /// \returns The cost of Call instruction. |
1498 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { |
1499 | return 10; |
1500 | } |
1501 | |
1502 | unsigned getNumberOfParts(Type *Tp) { |
1503 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); |
1504 | return LT.first; |
1505 | } |
1506 | |
1507 | unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, |
1508 | const SCEV *) { |
1509 | return 0; |
1510 | } |
1511 | |
1512 | /// Try to calculate arithmetic and shuffle op costs for reduction operations. |
1513 | /// We're assuming that reduction operation are performing the following way: |
1514 | /// 1. Non-pairwise reduction |
1515 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1516 | /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef> |
1517 | /// \----------------v-------------/ \----------v------------/ |
1518 | /// n/2 elements n/2 elements |
1519 | /// %red1 = op <n x t> %val, <n x t> val1 |
1520 | /// After this operation we have a vector %red1 where only the first n/2 |
1521 | /// elements are meaningful, the second n/2 elements are undefined and can be |
1522 | /// dropped. All other operations are actually working with the vector of |
1523 | /// length n/2, not n, though the real vector length is still n. |
1524 | /// %val2 = shufflevector<n x t> %red1, <n x t> %undef, |
1525 | /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef> |
1526 | /// \----------------v-------------/ \----------v------------/ |
1527 | /// n/4 elements 3*n/4 elements |
1528 | /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of |
1529 | /// length n/2, the resulting vector has length n/4 etc. |
1530 | /// 2. Pairwise reduction: |
1531 | /// Everything is the same except for an additional shuffle operation which |
1532 | /// is used to produce operands for pairwise kind of reductions. |
1533 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, |
1534 | /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef> |
1535 | /// \-------------v----------/ \----------v------------/ |
1536 | /// n/2 elements n/2 elements |
1537 | /// %val2 = shufflevector<n x t> %val, <n x t> %undef, |
1538 | /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef> |
1539 | /// \-------------v----------/ \----------v------------/ |
1540 | /// n/2 elements n/2 elements |
1541 | /// %red1 = op <n x t> %val1, <n x t> val2 |
1542 | /// Again, the operation is performed on <n x t> vector, but the resulting |
1543 | /// vector %red1 is <n/2 x t> vector. |
1544 | /// |
1545 | /// The cost model should take into account that the actual length of the |
1546 | /// vector is reduced on each iteration. |
1547 | unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, |
1548 | bool IsPairwise) { |
1549 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 1549, __PRETTY_FUNCTION__)); |
1550 | Type *ScalarTy = Ty->getVectorElementType(); |
1551 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1552 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1553 | unsigned ArithCost = 0; |
1554 | unsigned ShuffleCost = 0; |
1555 | auto *ConcreteTTI = static_cast<T *>(this); |
1556 | std::pair<unsigned, MVT> LT = |
1557 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1558 | unsigned LongVectorCount = 0; |
1559 | unsigned MVTLen = |
1560 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1561 | while (NumVecElts > MVTLen) { |
1562 | NumVecElts /= 2; |
1563 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1564 | // Assume the pairwise shuffles add a cost. |
1565 | ShuffleCost += (IsPairwise + 1) * |
1566 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1567 | NumVecElts, SubTy); |
1568 | ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); |
1569 | Ty = SubTy; |
1570 | ++LongVectorCount; |
1571 | } |
1572 | |
1573 | NumReduxLevels -= LongVectorCount; |
1574 | |
1575 | // The minimal length of the vector is limited by the real length of vector |
1576 | // operations performed on the current platform. That's why several final |
1577 | // reduction operations are performed on the vectors with the same |
1578 | // architecture-dependent length. |
1579 | |
1580 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1581 | // reductions need two shuffles on every level, but the last one. On that |
1582 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1583 | unsigned NumShuffles = NumReduxLevels; |
1584 | if (IsPairwise && NumReduxLevels >= 1) |
1585 | NumShuffles += NumReduxLevels - 1; |
1586 | ShuffleCost += NumShuffles * |
1587 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1588 | 0, Ty); |
1589 | ArithCost += NumReduxLevels * |
1590 | ConcreteTTI->getArithmeticInstrCost(Opcode, Ty); |
1591 | return ShuffleCost + ArithCost + |
1592 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1593 | } |
1594 | |
1595 | /// Try to calculate op costs for min/max reduction operations. |
1596 | /// \param CondTy Conditional type for the Select instruction. |
1597 | unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, |
1598 | bool) { |
1599 | assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast <void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 1599, __PRETTY_FUNCTION__)); |
1600 | Type *ScalarTy = Ty->getVectorElementType(); |
1601 | Type *ScalarCondTy = CondTy->getVectorElementType(); |
1602 | unsigned NumVecElts = Ty->getVectorNumElements(); |
1603 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
1604 | unsigned CmpOpcode; |
1605 | if (Ty->isFPOrFPVectorTy()) { |
1606 | CmpOpcode = Instruction::FCmp; |
1607 | } else { |
1608 | assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 1609, __PRETTY_FUNCTION__)) |
1609 | "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/BasicTTIImpl.h" , 1609, __PRETTY_FUNCTION__)); |
1610 | CmpOpcode = Instruction::ICmp; |
1611 | } |
1612 | unsigned MinMaxCost = 0; |
1613 | unsigned ShuffleCost = 0; |
1614 | auto *ConcreteTTI = static_cast<T *>(this); |
1615 | std::pair<unsigned, MVT> LT = |
1616 | ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); |
1617 | unsigned LongVectorCount = 0; |
1618 | unsigned MVTLen = |
1619 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
1620 | while (NumVecElts > MVTLen) { |
1621 | NumVecElts /= 2; |
1622 | Type *SubTy = VectorType::get(ScalarTy, NumVecElts); |
1623 | CondTy = VectorType::get(ScalarCondTy, NumVecElts); |
1624 | |
1625 | // Assume the pairwise shuffles add a cost. |
1626 | ShuffleCost += (IsPairwise + 1) * |
1627 | ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, |
1628 | NumVecElts, SubTy); |
1629 | MinMaxCost += |
1630 | ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + |
1631 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, |
1632 | nullptr); |
1633 | Ty = SubTy; |
1634 | ++LongVectorCount; |
1635 | } |
1636 | |
1637 | NumReduxLevels -= LongVectorCount; |
1638 | |
1639 | // The minimal length of the vector is limited by the real length of vector |
1640 | // operations performed on the current platform. That's why several final |
1641 | // reduction opertions are perfomed on the vectors with the same |
1642 | // architecture-dependent length. |
1643 | |
1644 | // Non pairwise reductions need one shuffle per reduction level. Pairwise |
1645 | // reductions need two shuffles on every level, but the last one. On that |
1646 | // level one of the shuffles is <0, u, u, ...> which is identity. |
1647 | unsigned NumShuffles = NumReduxLevels; |
1648 | if (IsPairwise && NumReduxLevels >= 1) |
1649 | NumShuffles += NumReduxLevels - 1; |
1650 | ShuffleCost += NumShuffles * |
1651 | ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, |
1652 | 0, Ty); |
1653 | MinMaxCost += |
1654 | NumReduxLevels * |
1655 | (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + |
1656 | ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, |
1657 | nullptr)); |
1658 | // The last min/max should be in vector registers and we counted it above. |
1659 | // So just need a single extractelement. |
1660 | return ShuffleCost + MinMaxCost + |
1661 | ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
1662 | } |
1663 | |
1664 | unsigned getVectorSplitCost() { return 1; } |
1665 | |
1666 | /// @} |
1667 | }; |
1668 | |
1669 | /// Concrete BasicTTIImpl that can be used if no further customization |
1670 | /// is needed. |
1671 | class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { |
1672 | using BaseT = BasicTTIImplBase<BasicTTIImpl>; |
1673 | |
1674 | friend class BasicTTIImplBase<BasicTTIImpl>; |
1675 | |
1676 | const TargetSubtargetInfo *ST; |
1677 | const TargetLoweringBase *TLI; |
1678 | |
1679 | const TargetSubtargetInfo *getST() const { return ST; } |
1680 | const TargetLoweringBase *getTLI() const { return TLI; } |
1681 | |
1682 | public: |
1683 | explicit BasicTTIImpl(const TargetMachine *TM, const Function &F); |
1684 | }; |
1685 | |
1686 | } // end namespace llvm |
1687 | |
1688 | #endif // LLVM_CODEGEN_BASICTTIIMPL_H |
1 | //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// | |||
9 | /// \file | |||
10 | /// This file describes how to lower LLVM code to machine code. This has two | |||
11 | /// main components: | |||
12 | /// | |||
13 | /// 1. Which ValueTypes are natively supported by the target. | |||
14 | /// 2. Which operations are supported for supported ValueTypes. | |||
15 | /// 3. Cost thresholds for alternative implementations of certain operations. | |||
16 | /// | |||
17 | /// In addition it has a few other components, like information about FP | |||
18 | /// immediates. | |||
19 | /// | |||
20 | //===----------------------------------------------------------------------===// | |||
21 | ||||
22 | #ifndef LLVM_CODEGEN_TARGETLOWERING_H | |||
23 | #define LLVM_CODEGEN_TARGETLOWERING_H | |||
24 | ||||
25 | #include "llvm/ADT/APInt.h" | |||
26 | #include "llvm/ADT/ArrayRef.h" | |||
27 | #include "llvm/ADT/DenseMap.h" | |||
28 | #include "llvm/ADT/STLExtras.h" | |||
29 | #include "llvm/ADT/SmallVector.h" | |||
30 | #include "llvm/ADT/StringRef.h" | |||
31 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" | |||
32 | #include "llvm/CodeGen/DAGCombine.h" | |||
33 | #include "llvm/CodeGen/ISDOpcodes.h" | |||
34 | #include "llvm/CodeGen/RuntimeLibcalls.h" | |||
35 | #include "llvm/CodeGen/SelectionDAG.h" | |||
36 | #include "llvm/CodeGen/SelectionDAGNodes.h" | |||
37 | #include "llvm/CodeGen/TargetCallingConv.h" | |||
38 | #include "llvm/CodeGen/ValueTypes.h" | |||
39 | #include "llvm/IR/Attributes.h" | |||
40 | #include "llvm/IR/CallSite.h" | |||
41 | #include "llvm/IR/CallingConv.h" | |||
42 | #include "llvm/IR/DataLayout.h" | |||
43 | #include "llvm/IR/DerivedTypes.h" | |||
44 | #include "llvm/IR/Function.h" | |||
45 | #include "llvm/IR/IRBuilder.h" | |||
46 | #include "llvm/IR/InlineAsm.h" | |||
47 | #include "llvm/IR/Instruction.h" | |||
48 | #include "llvm/IR/Instructions.h" | |||
49 | #include "llvm/IR/Type.h" | |||
50 | #include "llvm/MC/MCRegisterInfo.h" | |||
51 | #include "llvm/Support/AtomicOrdering.h" | |||
52 | #include "llvm/Support/Casting.h" | |||
53 | #include "llvm/Support/ErrorHandling.h" | |||
54 | #include "llvm/Support/MachineValueType.h" | |||
55 | #include "llvm/Target/TargetMachine.h" | |||
56 | #include <algorithm> | |||
57 | #include <cassert> | |||
58 | #include <climits> | |||
59 | #include <cstdint> | |||
60 | #include <iterator> | |||
61 | #include <map> | |||
62 | #include <string> | |||
63 | #include <utility> | |||
64 | #include <vector> | |||
65 | ||||
66 | namespace llvm { | |||
67 | ||||
68 | class BranchProbability; | |||
69 | class CCState; | |||
70 | class CCValAssign; | |||
71 | class Constant; | |||
72 | class FastISel; | |||
73 | class FunctionLoweringInfo; | |||
74 | class GlobalValue; | |||
75 | class IntrinsicInst; | |||
76 | struct KnownBits; | |||
77 | class LLVMContext; | |||
78 | class MachineBasicBlock; | |||
79 | class MachineFunction; | |||
80 | class MachineInstr; | |||
81 | class MachineJumpTableInfo; | |||
82 | class MachineLoop; | |||
83 | class MachineRegisterInfo; | |||
84 | class MCContext; | |||
85 | class MCExpr; | |||
86 | class Module; | |||
87 | class TargetRegisterClass; | |||
88 | class TargetLibraryInfo; | |||
89 | class TargetRegisterInfo; | |||
90 | class Value; | |||
91 | ||||
92 | namespace Sched { | |||
93 | ||||
94 | enum Preference { | |||
95 | None, // No preference | |||
96 | Source, // Follow source order. | |||
97 | RegPressure, // Scheduling for lowest register pressure. | |||
98 | Hybrid, // Scheduling for both latency and register pressure. | |||
99 | ILP, // Scheduling for ILP in low register pressure mode. | |||
100 | VLIW // Scheduling for VLIW targets. | |||
101 | }; | |||
102 | ||||
103 | } // end namespace Sched | |||
104 | ||||
105 | /// This base class for TargetLowering contains the SelectionDAG-independent | |||
106 | /// parts that can be used from the rest of CodeGen. | |||
107 | class TargetLoweringBase { | |||
108 | public: | |||
109 | /// This enum indicates whether operations are valid for a target, and if not, | |||
110 | /// what action should be used to make them valid. | |||
111 | enum LegalizeAction : uint8_t { | |||
112 | Legal, // The target natively supports this operation. | |||
113 | Promote, // This operation should be executed in a larger type. | |||
114 | Expand, // Try to expand this to other ops, otherwise use a libcall. | |||
115 | LibCall, // Don't try to expand this to other ops, always use a libcall. | |||
116 | Custom // Use the LowerOperation hook to implement custom lowering. | |||
117 | }; | |||
118 | ||||
119 | /// This enum indicates whether a types are legal for a target, and if not, | |||
120 | /// what action should be used to make them valid. | |||
121 | enum LegalizeTypeAction : uint8_t { | |||
122 | TypeLegal, // The target natively supports this type. | |||
123 | TypePromoteInteger, // Replace this integer with a larger one. | |||
124 | TypeExpandInteger, // Split this integer into two of half the size. | |||
125 | TypeSoftenFloat, // Convert this float to a same size integer type, | |||
126 | // if an operation is not supported in target HW. | |||
127 | TypeExpandFloat, // Split this float into two of half the size. | |||
128 | TypeScalarizeVector, // Replace this one-element vector with its element. | |||
129 | TypeSplitVector, // Split this vector into two of half the size. | |||
130 | TypeWidenVector, // This vector should be widened into a larger vector. | |||
131 | TypePromoteFloat // Replace this float with a larger one. | |||
132 | }; | |||
133 | ||||
134 | /// LegalizeKind holds the legalization kind that needs to happen to EVT | |||
135 | /// in order to type-legalize it. | |||
136 | using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; | |||
137 | ||||
138 | /// Enum that describes how the target represents true/false values. | |||
139 | enum BooleanContent { | |||
140 | UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. | |||
141 | ZeroOrOneBooleanContent, // All bits zero except for bit 0. | |||
142 | ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. | |||
143 | }; | |||
144 | ||||
145 | /// Enum that describes what type of support for selects the target has. | |||
146 | enum SelectSupportKind { | |||
147 | ScalarValSelect, // The target supports scalar selects (ex: cmov). | |||
148 | ScalarCondVectorVal, // The target supports selects with a scalar condition | |||
149 | // and vector values (ex: cmov). | |||
150 | VectorMaskSelect // The target supports vector selects with a vector | |||
151 | // mask (ex: x86 blends). | |||
152 | }; | |||
153 | ||||
154 | /// Enum that specifies what an atomic load/AtomicRMWInst is expanded | |||
155 | /// to, if at all. Exists because different targets have different levels of | |||
156 | /// support for these atomic instructions, and also have different options | |||
157 | /// w.r.t. what they should expand to. | |||
158 | enum class AtomicExpansionKind { | |||
159 | None, // Don't expand the instruction. | |||
160 | LLSC, // Expand the instruction into loadlinked/storeconditional; used | |||
161 | // by ARM/AArch64. | |||
162 | LLOnly, // Expand the (load) instruction into just a load-linked, which has | |||
163 | // greater atomic guarantees than a normal load. | |||
164 | CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. | |||
165 | MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. | |||
166 | }; | |||
167 | ||||
168 | /// Enum that specifies when a multiplication should be expanded. | |||
169 | enum class MulExpansionKind { | |||
170 | Always, // Always expand the instruction. | |||
171 | OnlyLegalOrCustom, // Only expand when the resulting instructions are legal | |||
172 | // or custom. | |||
173 | }; | |||
174 | ||||
175 | class ArgListEntry { | |||
176 | public: | |||
177 | Value *Val = nullptr; | |||
178 | SDValue Node = SDValue(); | |||
179 | Type *Ty = nullptr; | |||
180 | bool IsSExt : 1; | |||
181 | bool IsZExt : 1; | |||
182 | bool IsInReg : 1; | |||
183 | bool IsSRet : 1; | |||
184 | bool IsNest : 1; | |||
185 | bool IsByVal : 1; | |||
186 | bool IsInAlloca : 1; | |||
187 | bool IsReturned : 1; | |||
188 | bool IsSwiftSelf : 1; | |||
189 | bool IsSwiftError : 1; | |||
190 | uint16_t Alignment = 0; | |||
191 | ||||
192 | ArgListEntry() | |||
193 | : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), | |||
194 | IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), | |||
195 | IsSwiftSelf(false), IsSwiftError(false) {} | |||
196 | ||||
197 | void setAttributes(const CallBase *Call, unsigned ArgIdx); | |||
198 | ||||
199 | void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) { | |||
200 | return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx); | |||
201 | } | |||
202 | }; | |||
203 | using ArgListTy = std::vector<ArgListEntry>; | |||
204 | ||||
205 | virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, | |||
206 | ArgListTy &Args) const {}; | |||
207 | ||||
208 | static ISD::NodeType getExtendForContent(BooleanContent Content) { | |||
209 | switch (Content) { | |||
210 | case UndefinedBooleanContent: | |||
211 | // Extend by adding rubbish bits. | |||
212 | return ISD::ANY_EXTEND; | |||
213 | case ZeroOrOneBooleanContent: | |||
214 | // Extend by adding zero bits. | |||
215 | return ISD::ZERO_EXTEND; | |||
216 | case ZeroOrNegativeOneBooleanContent: | |||
217 | // Extend by copying the sign bit. | |||
218 | return ISD::SIGN_EXTEND; | |||
219 | } | |||
220 | llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 220); | |||
221 | } | |||
222 | ||||
223 | /// NOTE: The TargetMachine owns TLOF. | |||
224 | explicit TargetLoweringBase(const TargetMachine &TM); | |||
225 | TargetLoweringBase(const TargetLoweringBase &) = delete; | |||
226 | TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; | |||
227 | virtual ~TargetLoweringBase() = default; | |||
228 | ||||
229 | protected: | |||
230 | /// Initialize all of the actions to default values. | |||
231 | void initActions(); | |||
232 | ||||
233 | public: | |||
234 | const TargetMachine &getTargetMachine() const { return TM; } | |||
235 | ||||
236 | virtual bool useSoftFloat() const { return false; } | |||
237 | ||||
238 | /// Return the pointer type for the given address space, defaults to | |||
239 | /// the pointer type from the data layout. | |||
240 | /// FIXME: The default needs to be removed once all the code is updated. | |||
241 | virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { | |||
242 | return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); | |||
243 | } | |||
244 | ||||
245 | /// Return the in-memory pointer type for the given address space, defaults to | |||
246 | /// the pointer type from the data layout. FIXME: The default needs to be | |||
247 | /// removed once all the code is updated. | |||
248 | MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { | |||
249 | return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); | |||
250 | } | |||
251 | ||||
252 | /// Return the type for frame index, which is determined by | |||
253 | /// the alloca address space specified through the data layout. | |||
254 | MVT getFrameIndexTy(const DataLayout &DL) const { | |||
255 | return getPointerTy(DL, DL.getAllocaAddrSpace()); | |||
256 | } | |||
257 | ||||
258 | /// Return the type for operands of fence. | |||
259 | /// TODO: Let fence operands be of i32 type and remove this. | |||
260 | virtual MVT getFenceOperandTy(const DataLayout &DL) const { | |||
261 | return getPointerTy(DL); | |||
262 | } | |||
263 | ||||
264 | /// EVT is not used in-tree, but is used by out-of-tree target. | |||
265 | /// A documentation for this function would be nice... | |||
266 | virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; | |||
267 | ||||
268 | EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, | |||
269 | bool LegalTypes = true) const; | |||
270 | ||||
271 | /// Returns the type to be used for the index operand of: | |||
272 | /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, | |||
273 | /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR | |||
274 | virtual MVT getVectorIdxTy(const DataLayout &DL) const { | |||
275 | return getPointerTy(DL); | |||
276 | } | |||
277 | ||||
278 | virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { | |||
279 | return true; | |||
280 | } | |||
281 | ||||
282 | /// Return true if it is profitable to convert a select of FP constants into | |||
283 | /// a constant pool load whose address depends on the select condition. The | |||
284 | /// parameter may be used to differentiate a select with FP compare from | |||
285 | /// integer compare. | |||
286 | virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { | |||
287 | return true; | |||
288 | } | |||
289 | ||||
290 | /// Return true if multiple condition registers are available. | |||
291 | bool hasMultipleConditionRegisters() const { | |||
292 | return HasMultipleConditionRegisters; | |||
293 | } | |||
294 | ||||
295 | /// Return true if the target has BitExtract instructions. | |||
296 | bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } | |||
297 | ||||
298 | /// Return the preferred vector type legalization action. | |||
299 | virtual TargetLoweringBase::LegalizeTypeAction | |||
300 | getPreferredVectorAction(MVT VT) const { | |||
301 | // The default action for one element vectors is to scalarize | |||
302 | if (VT.getVectorNumElements() == 1) | |||
303 | return TypeScalarizeVector; | |||
304 | // The default action for an odd-width vector is to widen. | |||
305 | if (!VT.isPow2VectorType()) | |||
306 | return TypeWidenVector; | |||
307 | // The default action for other vectors is to promote | |||
308 | return TypePromoteInteger; | |||
309 | } | |||
310 | ||||
311 | // There are two general methods for expanding a BUILD_VECTOR node: | |||
312 | // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle | |||
313 | // them together. | |||
314 | // 2. Build the vector on the stack and then load it. | |||
315 | // If this function returns true, then method (1) will be used, subject to | |||
316 | // the constraint that all of the necessary shuffles are legal (as determined | |||
317 | // by isShuffleMaskLegal). If this function returns false, then method (2) is | |||
318 | // always used. The vector type, and the number of defined values, are | |||
319 | // provided. | |||
320 | virtual bool | |||
321 | shouldExpandBuildVectorWithShuffles(EVT /* VT */, | |||
322 | unsigned DefinedValues) const { | |||
323 | return DefinedValues < 3; | |||
324 | } | |||
325 | ||||
326 | /// Return true if integer divide is usually cheaper than a sequence of | |||
327 | /// several shifts, adds, and multiplies for this target. | |||
328 | /// The definition of "cheaper" may depend on whether we're optimizing | |||
329 | /// for speed or for size. | |||
330 | virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } | |||
331 | ||||
332 | /// Return true if the target can handle a standalone remainder operation. | |||
333 | virtual bool hasStandaloneRem(EVT VT) const { | |||
334 | return true; | |||
335 | } | |||
336 | ||||
337 | /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). | |||
338 | virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { | |||
339 | // Default behavior is to replace SQRT(X) with X*RSQRT(X). | |||
340 | return false; | |||
341 | } | |||
342 | ||||
343 | /// Reciprocal estimate status values used by the functions below. | |||
344 | enum ReciprocalEstimate : int { | |||
345 | Unspecified = -1, | |||
346 | Disabled = 0, | |||
347 | Enabled = 1 | |||
348 | }; | |||
349 | ||||
350 | /// Return a ReciprocalEstimate enum value for a square root of the given type | |||
351 | /// based on the function's attributes. If the operation is not overridden by | |||
352 | /// the function's attributes, "Unspecified" is returned and target defaults | |||
353 | /// are expected to be used for instruction selection. | |||
354 | int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; | |||
355 | ||||
356 | /// Return a ReciprocalEstimate enum value for a division of the given type | |||
357 | /// based on the function's attributes. If the operation is not overridden by | |||
358 | /// the function's attributes, "Unspecified" is returned and target defaults | |||
359 | /// are expected to be used for instruction selection. | |||
360 | int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; | |||
361 | ||||
362 | /// Return the refinement step count for a square root of the given type based | |||
363 | /// on the function's attributes. If the operation is not overridden by | |||
364 | /// the function's attributes, "Unspecified" is returned and target defaults | |||
365 | /// are expected to be used for instruction selection. | |||
366 | int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; | |||
367 | ||||
368 | /// Return the refinement step count for a division of the given type based | |||
369 | /// on the function's attributes. If the operation is not overridden by | |||
370 | /// the function's attributes, "Unspecified" is returned and target defaults | |||
371 | /// are expected to be used for instruction selection. | |||
372 | int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; | |||
373 | ||||
374 | /// Returns true if target has indicated at least one type should be bypassed. | |||
375 | bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } | |||
376 | ||||
377 | /// Returns map of slow types for division or remainder with corresponding | |||
378 | /// fast types | |||
379 | const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { | |||
380 | return BypassSlowDivWidths; | |||
381 | } | |||
382 | ||||
383 | /// Return true if Flow Control is an expensive operation that should be | |||
384 | /// avoided. | |||
385 | bool isJumpExpensive() const { return JumpIsExpensive; } | |||
386 | ||||
387 | /// Return true if selects are only cheaper than branches if the branch is | |||
388 | /// unlikely to be predicted right. | |||
389 | bool isPredictableSelectExpensive() const { | |||
390 | return PredictableSelectIsExpensive; | |||
391 | } | |||
392 | ||||
393 | /// If a branch or a select condition is skewed in one direction by more than | |||
394 | /// this factor, it is very likely to be predicted correctly. | |||
395 | virtual BranchProbability getPredictableBranchThreshold() const; | |||
396 | ||||
397 | /// Return true if the following transform is beneficial: | |||
398 | /// fold (conv (load x)) -> (load (conv*)x) | |||
399 | /// On architectures that don't natively support some vector loads | |||
400 | /// efficiently, casting the load to a smaller vector of larger types and | |||
401 | /// loading is more efficient, however, this can be undone by optimizations in | |||
402 | /// dag combiner. | |||
403 | virtual bool isLoadBitCastBeneficial(EVT LoadVT, | |||
404 | EVT BitcastVT) const { | |||
405 | // Don't do if we could do an indexed load on the original type, but not on | |||
406 | // the new one. | |||
407 | if (!LoadVT.isSimple() || !BitcastVT.isSimple()) | |||
408 | return true; | |||
409 | ||||
410 | MVT LoadMVT = LoadVT.getSimpleVT(); | |||
411 | ||||
412 | // Don't bother doing this if it's just going to be promoted again later, as | |||
413 | // doing so might interfere with other combines. | |||
414 | if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && | |||
415 | getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) | |||
416 | return false; | |||
417 | ||||
418 | return true; | |||
419 | } | |||
420 | ||||
421 | /// Return true if the following transform is beneficial: | |||
422 | /// (store (y (conv x)), y*)) -> (store x, (x*)) | |||
423 | virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const { | |||
424 | // Default to the same logic as loads. | |||
425 | return isLoadBitCastBeneficial(StoreVT, BitcastVT); | |||
426 | } | |||
427 | ||||
428 | /// Return true if it is expected to be cheaper to do a store of a non-zero | |||
429 | /// vector constant with the given size and type for the address space than to | |||
430 | /// store the individual scalar element constants. | |||
431 | virtual bool storeOfVectorConstantIsCheap(EVT MemVT, | |||
432 | unsigned NumElem, | |||
433 | unsigned AddrSpace) const { | |||
434 | return false; | |||
435 | } | |||
436 | ||||
437 | /// Allow store merging after legalization in addition to before legalization. | |||
438 | /// This may catch stores that do not exist earlier (eg, stores created from | |||
439 | /// intrinsics). | |||
440 | virtual bool mergeStoresAfterLegalization() const { return true; } | |||
441 | ||||
442 | /// Returns if it's reasonable to merge stores to MemVT size. | |||
443 | virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, | |||
444 | const SelectionDAG &DAG) const { | |||
445 | return true; | |||
446 | } | |||
447 | ||||
448 | /// Return true if it is cheap to speculate a call to intrinsic cttz. | |||
449 | virtual bool isCheapToSpeculateCttz() const { | |||
450 | return false; | |||
451 | } | |||
452 | ||||
453 | /// Return true if it is cheap to speculate a call to intrinsic ctlz. | |||
454 | virtual bool isCheapToSpeculateCtlz() const { | |||
455 | return false; | |||
456 | } | |||
457 | ||||
458 | /// Return true if ctlz instruction is fast. | |||
459 | virtual bool isCtlzFast() const { | |||
460 | return false; | |||
461 | } | |||
462 | ||||
463 | /// Return true if it is safe to transform an integer-domain bitwise operation | |||
464 | /// into the equivalent floating-point operation. This should be set to true | |||
465 | /// if the target has IEEE-754-compliant fabs/fneg operations for the input | |||
466 | /// type. | |||
467 | virtual bool hasBitPreservingFPLogic(EVT VT) const { | |||
468 | return false; | |||
469 | } | |||
470 | ||||
471 | /// Return true if it is cheaper to split the store of a merged int val | |||
472 | /// from a pair of smaller values into multiple stores. | |||
473 | virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { | |||
474 | return false; | |||
475 | } | |||
476 | ||||
477 | /// Return if the target supports combining a | |||
478 | /// chain like: | |||
479 | /// \code | |||
480 | /// %andResult = and %val1, #mask | |||
481 | /// %icmpResult = icmp %andResult, 0 | |||
482 | /// \endcode | |||
483 | /// into a single machine instruction of a form like: | |||
484 | /// \code | |||
485 | /// cc = test %register, #mask | |||
486 | /// \endcode | |||
487 | virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { | |||
488 | return false; | |||
489 | } | |||
490 | ||||
491 | /// Use bitwise logic to make pairs of compares more efficient. For example: | |||
492 | /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 | |||
493 | /// This should be true when it takes more than one instruction to lower | |||
494 | /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on | |||
495 | /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. | |||
496 | virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { | |||
497 | return false; | |||
498 | } | |||
499 | ||||
500 | /// Return the preferred operand type if the target has a quick way to compare | |||
501 | /// integer values of the given size. Assume that any legal integer type can | |||
502 | /// be compared efficiently. Targets may override this to allow illegal wide | |||
503 | /// types to return a vector type if there is support to compare that type. | |||
504 | virtual MVT hasFastEqualityCompare(unsigned NumBits) const { | |||
505 | MVT VT = MVT::getIntegerVT(NumBits); | |||
506 | return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; | |||
507 | } | |||
508 | ||||
509 | /// Return true if the target should transform: | |||
510 | /// (X & Y) == Y ---> (~X & Y) == 0 | |||
511 | /// (X & Y) != Y ---> (~X & Y) != 0 | |||
512 | /// | |||
513 | /// This may be profitable if the target has a bitwise and-not operation that | |||
514 | /// sets comparison flags. A target may want to limit the transformation based | |||
515 | /// on the type of Y or if Y is a constant. | |||
516 | /// | |||
517 | /// Note that the transform will not occur if Y is known to be a power-of-2 | |||
518 | /// because a mask and compare of a single bit can be handled by inverting the | |||
519 | /// predicate, for example: | |||
520 | /// (X & 8) == 8 ---> (X & 8) != 0 | |||
521 | virtual bool hasAndNotCompare(SDValue Y) const { | |||
522 | return false; | |||
523 | } | |||
524 | ||||
525 | /// Return true if the target has a bitwise and-not operation: | |||
526 | /// X = ~A & B | |||
527 | /// This can be used to simplify select or other instructions. | |||
528 | virtual bool hasAndNot(SDValue X) const { | |||
529 | // If the target has the more complex version of this operation, assume that | |||
530 | // it has this operation too. | |||
531 | return hasAndNotCompare(X); | |||
532 | } | |||
533 | ||||
534 | /// There are two ways to clear extreme bits (either low or high): | |||
535 | /// Mask: x & (-1 << y) (the instcombine canonical form) | |||
536 | /// Shifts: x >> y << y | |||
537 | /// Return true if the variant with 2 variable shifts is preferred. | |||
538 | /// Return false if there is no preference. | |||
539 | virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { | |||
540 | // By default, let's assume that no one prefers shifts. | |||
541 | return false; | |||
542 | } | |||
543 | ||||
544 | /// Return true if it is profitable to fold a pair of shifts into a mask. | |||
545 | /// This is usually true on most targets. But some targets, like Thumb1, | |||
546 | /// have immediate shift instructions, but no immediate "and" instruction; | |||
547 | /// this makes the fold unprofitable. | |||
548 | virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, | |||
549 | CombineLevel Level) const { | |||
550 | return true; | |||
551 | } | |||
552 | ||||
553 | /// Should we tranform the IR-optimal check for whether given truncation | |||
554 | /// down into KeptBits would be truncating or not: | |||
555 | /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) | |||
556 | /// Into it's more traditional form: | |||
557 | /// ((%x << C) a>> C) dstcond %x | |||
558 | /// Return true if we should transform. | |||
559 | /// Return false if there is no preference. | |||
560 | virtual bool shouldTransformSignedTruncationCheck(EVT XVT, | |||
561 | unsigned KeptBits) const { | |||
562 | // By default, let's assume that no one prefers shifts. | |||
563 | return false; | |||
564 | } | |||
565 | ||||
566 | /// Return true if the target wants to use the optimization that | |||
567 | /// turns ext(promotableInst1(...(promotableInstN(load)))) into | |||
568 | /// promotedInst1(...(promotedInstN(ext(load)))). | |||
569 | bool enableExtLdPromotion() const { return EnableExtLdPromotion; } | |||
570 | ||||
571 | /// Return true if the target can combine store(extractelement VectorTy, | |||
572 | /// Idx). | |||
573 | /// \p Cost[out] gives the cost of that transformation when this is true. | |||
574 | virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, | |||
575 | unsigned &Cost) const { | |||
576 | return false; | |||
577 | } | |||
578 | ||||
579 | /// Return true if inserting a scalar into a variable element of an undef | |||
580 | /// vector is more efficiently handled by splatting the scalar instead. | |||
581 | virtual bool shouldSplatInsEltVarIndex(EVT) const { | |||
582 | return false; | |||
583 | } | |||
584 | ||||
585 | /// Return true if target always beneficiates from combining into FMA for a | |||
586 | /// given value type. This must typically return false on targets where FMA | |||
587 | /// takes more cycles to execute than FADD. | |||
588 | virtual bool enableAggressiveFMAFusion(EVT VT) const { | |||
589 | return false; | |||
590 | } | |||
591 | ||||
592 | /// Return the ValueType of the result of SETCC operations. | |||
593 | virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, | |||
594 | EVT VT) const; | |||
595 | ||||
596 | /// Return the ValueType for comparison libcalls. Comparions libcalls include | |||
597 | /// floating point comparion calls, and Ordered/Unordered check calls on | |||
598 | /// floating point numbers. | |||
599 | virtual | |||
600 | MVT::SimpleValueType getCmpLibcallReturnType() const; | |||
601 | ||||
602 | /// For targets without i1 registers, this gives the nature of the high-bits | |||
603 | /// of boolean values held in types wider than i1. | |||
604 | /// | |||
605 | /// "Boolean values" are special true/false values produced by nodes like | |||
606 | /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. | |||
607 | /// Not to be confused with general values promoted from i1. Some cpus | |||
608 | /// distinguish between vectors of boolean and scalars; the isVec parameter | |||
609 | /// selects between the two kinds. For example on X86 a scalar boolean should | |||
610 | /// be zero extended from i1, while the elements of a vector of booleans | |||
611 | /// should be sign extended from i1. | |||
612 | /// | |||
613 | /// Some cpus also treat floating point types the same way as they treat | |||
614 | /// vectors instead of the way they treat scalars. | |||
615 | BooleanContent getBooleanContents(bool isVec, bool isFloat) const { | |||
616 | if (isVec) | |||
617 | return BooleanVectorContents; | |||
618 | return isFloat ? BooleanFloatContents : BooleanContents; | |||
619 | } | |||
620 | ||||
621 | BooleanContent getBooleanContents(EVT Type) const { | |||
622 | return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); | |||
623 | } | |||
624 | ||||
625 | /// Return target scheduling preference. | |||
626 | Sched::Preference getSchedulingPreference() const { | |||
627 | return SchedPreferenceInfo; | |||
628 | } | |||
629 | ||||
630 | /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics | |||
631 | /// for different nodes. This function returns the preference (or none) for | |||
632 | /// the given node. | |||
633 | virtual Sched::Preference getSchedulingPreference(SDNode *) const { | |||
634 | return Sched::None; | |||
635 | } | |||
636 | ||||
637 | /// Return the register class that should be used for the specified value | |||
638 | /// type. | |||
639 | virtual const TargetRegisterClass *getRegClassFor(MVT VT) const { | |||
640 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; | |||
641 | assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!") ? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 641, __PRETTY_FUNCTION__)); | |||
642 | return RC; | |||
643 | } | |||
644 | ||||
645 | /// Return the 'representative' register class for the specified value | |||
646 | /// type. | |||
647 | /// | |||
648 | /// The 'representative' register class is the largest legal super-reg | |||
649 | /// register class for the register class of the value type. For example, on | |||
650 | /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep | |||
651 | /// register class is GR64 on x86_64. | |||
652 | virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { | |||
653 | const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; | |||
654 | return RC; | |||
655 | } | |||
656 | ||||
657 | /// Return the cost of the 'representative' register class for the specified | |||
658 | /// value type. | |||
659 | virtual uint8_t getRepRegClassCostFor(MVT VT) const { | |||
660 | return RepRegClassCostForVT[VT.SimpleTy]; | |||
661 | } | |||
662 | ||||
663 | /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS | |||
664 | /// instructions, and false if a library call is preferred (e.g for code-size | |||
665 | /// reasons). | |||
666 | virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { | |||
667 | return true; | |||
668 | } | |||
669 | ||||
670 | /// Return true if the target has native support for the specified value type. | |||
671 | /// This means that it has a register that directly holds it without | |||
672 | /// promotions or expansions. | |||
673 | bool isTypeLegal(EVT VT) const { | |||
674 | assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof (RegClassForVT)) ? static_cast<void> (0) : __assert_fail ("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 675, __PRETTY_FUNCTION__)) | |||
675 | (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof (RegClassForVT)) ? static_cast<void> (0) : __assert_fail ("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 675, __PRETTY_FUNCTION__)); | |||
676 | return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; | |||
677 | } | |||
678 | ||||
679 | class ValueTypeActionImpl { | |||
680 | /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum | |||
681 | /// that indicates how instruction selection should deal with the type. | |||
682 | LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; | |||
683 | ||||
684 | public: | |||
685 | ValueTypeActionImpl() { | |||
686 | std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), | |||
687 | TypeLegal); | |||
688 | } | |||
689 | ||||
690 | LegalizeTypeAction getTypeAction(MVT VT) const { | |||
691 | return ValueTypeActions[VT.SimpleTy]; | |||
692 | } | |||
693 | ||||
694 | void setTypeAction(MVT VT, LegalizeTypeAction Action) { | |||
695 | ValueTypeActions[VT.SimpleTy] = Action; | |||
696 | } | |||
697 | }; | |||
698 | ||||
699 | const ValueTypeActionImpl &getValueTypeActions() const { | |||
700 | return ValueTypeActions; | |||
701 | } | |||
702 | ||||
703 | /// Return how we should legalize values of this type, either it is already | |||
704 | /// legal (return 'Legal') or we need to promote it to a larger type (return | |||
705 | /// 'Promote'), or we need to expand it into multiple registers of smaller | |||
706 | /// integer type (return 'Expand'). 'Custom' is not an option. | |||
707 | LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { | |||
708 | return getTypeConversion(Context, VT).first; | |||
709 | } | |||
710 | LegalizeTypeAction getTypeAction(MVT VT) const { | |||
711 | return ValueTypeActions.getTypeAction(VT); | |||
712 | } | |||
713 | ||||
714 | /// For types supported by the target, this is an identity function. For | |||
715 | /// types that must be promoted to larger types, this returns the larger type | |||
716 | /// to promote to. For integer types that are larger than the largest integer | |||
717 | /// register, this contains one step in the expansion to get to the smaller | |||
718 | /// register. For illegal floating point types, this returns the integer type | |||
719 | /// to transform to. | |||
720 | EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { | |||
721 | return getTypeConversion(Context, VT).second; | |||
722 | } | |||
723 | ||||
724 | /// For types supported by the target, this is an identity function. For | |||
725 | /// types that must be expanded (i.e. integer types that are larger than the | |||
726 | /// largest integer register or illegal floating point types), this returns | |||
727 | /// the largest legal type it will be expanded to. | |||
728 | EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { | |||
729 | assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail ("!VT.isVector()", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 729, __PRETTY_FUNCTION__)); | |||
730 | while (true) { | |||
731 | switch (getTypeAction(Context, VT)) { | |||
732 | case TypeLegal: | |||
733 | return VT; | |||
734 | case TypeExpandInteger: | |||
735 | VT = getTypeToTransformTo(Context, VT); | |||
736 | break; | |||
737 | default: | |||
738 | llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 738); | |||
739 | } | |||
740 | } | |||
741 | } | |||
742 | ||||
743 | /// Vector types are broken down into some number of legal first class types. | |||
744 | /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 | |||
745 | /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 | |||
746 | /// turns into 4 EVT::i32 values with both PPC and X86. | |||
747 | /// | |||
748 | /// This method returns the number of registers needed, and the VT for each | |||
749 | /// register. It also returns the VT and quantity of the intermediate values | |||
750 | /// before they are promoted/expanded. | |||
751 | unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, | |||
752 | EVT &IntermediateVT, | |||
753 | unsigned &NumIntermediates, | |||
754 | MVT &RegisterVT) const; | |||
755 | ||||
756 | /// Certain targets such as MIPS require that some types such as vectors are | |||
757 | /// always broken down into scalars in some contexts. This occurs even if the | |||
758 | /// vector type is legal. | |||
759 | virtual unsigned getVectorTypeBreakdownForCallingConv( | |||
760 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, | |||
761 | unsigned &NumIntermediates, MVT &RegisterVT) const { | |||
762 | return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, | |||
763 | RegisterVT); | |||
764 | } | |||
765 | ||||
766 | struct IntrinsicInfo { | |||
767 | unsigned opc = 0; // target opcode | |||
768 | EVT memVT; // memory VT | |||
769 | ||||
770 | // value representing memory location | |||
771 | PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; | |||
772 | ||||
773 | int offset = 0; // offset off of ptrVal | |||
774 | unsigned size = 0; // the size of the memory location | |||
775 | // (taken from memVT if zero) | |||
776 | unsigned align = 1; // alignment | |||
777 | ||||
778 | MachineMemOperand::Flags flags = MachineMemOperand::MONone; | |||
779 | IntrinsicInfo() = default; | |||
780 | }; | |||
781 | ||||
782 | /// Given an intrinsic, checks if on the target the intrinsic will need to map | |||
783 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns | |||
784 | /// true and store the intrinsic information into the IntrinsicInfo that was | |||
785 | /// passed to the function. | |||
786 | virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, | |||
787 | MachineFunction &, | |||
788 | unsigned /*Intrinsic*/) const { | |||
789 | return false; | |||
790 | } | |||
791 | ||||
792 | /// Returns true if the target can instruction select the specified FP | |||
793 | /// immediate natively. If false, the legalizer will materialize the FP | |||
794 | /// immediate as a load from a constant pool. | |||
795 | virtual bool isFPImmLegal(const APFloat &/*Imm*/, EVT /*VT*/, | |||
796 | bool ForCodeSize = false) const { | |||
797 | return false; | |||
798 | } | |||
799 | ||||
800 | /// Targets can use this to indicate that they only support *some* | |||
801 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a | |||
802 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be | |||
803 | /// legal. | |||
804 | virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { | |||
805 | return true; | |||
806 | } | |||
807 | ||||
808 | /// Returns true if the operation can trap for the value type. | |||
809 | /// | |||
810 | /// VT must be a legal type. By default, we optimistically assume most | |||
811 | /// operations don't trap except for integer divide and remainder. | |||
812 | virtual bool canOpTrap(unsigned Op, EVT VT) const; | |||
813 | ||||
814 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there | |||
815 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a | |||
816 | /// constant pool entry. | |||
817 | virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, | |||
818 | EVT /*VT*/) const { | |||
819 | return false; | |||
820 | } | |||
821 | ||||
822 | /// Return how this operation should be treated: either it is legal, needs to | |||
823 | /// be promoted to a larger size, needs to be expanded to some other code | |||
824 | /// sequence, or the target has a custom expander for it. | |||
825 | LegalizeAction getOperationAction(unsigned Op, EVT VT) const { | |||
826 | if (VT.isExtended()) return Expand; | |||
827 | // If a target-specific SDNode requires legalization, require the target | |||
828 | // to provide custom legalization for it. | |||
829 | if (Op >= array_lengthof(OpActions[0])) return Custom; | |||
830 | return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; | |||
831 | } | |||
832 | ||||
833 | /// Custom method defined by each target to indicate if an operation which | |||
834 | /// may require a scale is supported natively by the target. | |||
835 | /// If not, the operation is illegal. | |||
836 | virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, | |||
837 | unsigned Scale) const { | |||
838 | return false; | |||
839 | } | |||
840 | ||||
841 | /// Some fixed point operations may be natively supported by the target but | |||
842 | /// only for specific scales. This method allows for checking | |||
843 | /// if the width is supported by the target for a given operation that may | |||
844 | /// depend on scale. | |||
845 | LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, | |||
846 | unsigned Scale) const { | |||
847 | auto Action = getOperationAction(Op, VT); | |||
848 | if (Action != Legal) | |||
849 | return Action; | |||
850 | ||||
851 | // This operation is supported in this type but may only work on specific | |||
852 | // scales. | |||
853 | bool Supported; | |||
854 | switch (Op) { | |||
855 | default: | |||
856 | llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation." , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 856); | |||
857 | case ISD::SMULFIX: | |||
858 | case ISD::UMULFIX: | |||
859 | Supported = isSupportedFixedPointOperation(Op, VT, Scale); | |||
860 | break; | |||
861 | } | |||
862 | ||||
863 | return Supported ? Action : Expand; | |||
864 | } | |||
865 | ||||
866 | LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { | |||
867 | unsigned EqOpc; | |||
868 | switch (Op) { | |||
869 | default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 869); | |||
870 | case ISD::STRICT_FADD: EqOpc = ISD::FADD; break; | |||
871 | case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break; | |||
872 | case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break; | |||
873 | case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break; | |||
874 | case ISD::STRICT_FREM: EqOpc = ISD::FREM; break; | |||
875 | case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; | |||
876 | case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; | |||
877 | case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; | |||
878 | case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; | |||
879 | case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; | |||
880 | case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; | |||
881 | case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; | |||
882 | case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; | |||
883 | case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; | |||
884 | case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; | |||
885 | case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; | |||
886 | case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; | |||
887 | case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; | |||
888 | case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; | |||
889 | case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; | |||
890 | case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; | |||
891 | case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; | |||
892 | case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; | |||
893 | case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; | |||
894 | } | |||
895 | ||||
896 | auto Action = getOperationAction(EqOpc, VT); | |||
897 | ||||
898 | // We don't currently handle Custom or Promote for strict FP pseudo-ops. | |||
899 | // For now, we just expand for those cases. | |||
900 | if (Action != Legal) | |||
901 | Action = Expand; | |||
902 | ||||
903 | return Action; | |||
904 | } | |||
905 | ||||
906 | /// Return true if the specified operation is legal on this target or can be | |||
907 | /// made legal with custom lowering. This is used to help guide high-level | |||
908 | /// lowering decisions. | |||
909 | bool isOperationLegalOrCustom(unsigned Op, EVT VT) const { | |||
910 | return (VT == MVT::Other || isTypeLegal(VT)) && | |||
911 | (getOperationAction(Op, VT) == Legal || | |||
912 | getOperationAction(Op, VT) == Custom); | |||
913 | } | |||
914 | ||||
915 | /// Return true if the specified operation is legal on this target or can be | |||
916 | /// made legal using promotion. This is used to help guide high-level lowering | |||
917 | /// decisions. | |||
918 | bool isOperationLegalOrPromote(unsigned Op, EVT VT) const { | |||
919 | return (VT == MVT::Other || isTypeLegal(VT)) && | |||
920 | (getOperationAction(Op, VT) == Legal || | |||
921 | getOperationAction(Op, VT) == Promote); | |||
922 | } | |||
923 | ||||
924 | /// Return true if the specified operation is legal on this target or can be | |||
925 | /// made legal with custom lowering or using promotion. This is used to help | |||
926 | /// guide high-level lowering decisions. | |||
927 | bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const { | |||
928 | return (VT == MVT::Other || isTypeLegal(VT)) && | |||
929 | (getOperationAction(Op, VT) == Legal || | |||
930 | getOperationAction(Op, VT) == Custom || | |||
931 | getOperationAction(Op, VT) == Promote); | |||
932 | } | |||
933 | ||||
934 | /// Return true if the operation uses custom lowering, regardless of whether | |||
935 | /// the type is legal or not. | |||
936 | bool isOperationCustom(unsigned Op, EVT VT) const { | |||
937 | return getOperationAction(Op, VT) == Custom; | |||
938 | } | |||
939 | ||||
940 | /// Return true if lowering to a jump table is allowed. | |||
941 | virtual bool areJTsAllowed(const Function *Fn) const { | |||
942 | if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") | |||
943 | return false; | |||
944 | ||||
945 | return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | |||
946 | isOperationLegalOrCustom(ISD::BRIND, MVT::Other); | |||
947 | } | |||
948 | ||||
949 | /// Check whether the range [Low,High] fits in a machine word. | |||
950 | bool rangeFitsInWord(const APInt &Low, const APInt &High, | |||
951 | const DataLayout &DL) const { | |||
952 | // FIXME: Using the pointer type doesn't seem ideal. | |||
953 | uint64_t BW = DL.getIndexSizeInBits(0u); | |||
954 | uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1; | |||
955 | return Range <= BW; | |||
956 | } | |||
957 | ||||
958 | /// Return true if lowering to a jump table is suitable for a set of case | |||
959 | /// clusters which may contain \p NumCases cases, \p Range range of values. | |||
960 | /// FIXME: This function check the maximum table size and density, but the | |||
961 | /// minimum size is not checked. It would be nice if the minimum size is | |||
962 | /// also combined within this function. Currently, the minimum size check is | |||
963 | /// performed in findJumpTable() in SelectionDAGBuiler and | |||
964 | /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl. | |||
965 | virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, | |||
966 | uint64_t Range) const { | |||
967 | const bool OptForSize = SI->getParent()->getParent()->hasOptSize(); | |||
968 | const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); | |||
969 | const unsigned MaxJumpTableSize = | |||
970 | OptForSize ? UINT_MAX(2147483647 *2U +1U) : getMaximumJumpTableSize(); | |||
971 | // Check whether a range of clusters is dense enough for a jump table. | |||
972 | if (Range <= MaxJumpTableSize && | |||
973 | (NumCases * 100 >= Range * MinDensity)) { | |||
974 | return true; | |||
975 | } | |||
976 | return false; | |||
977 | } | |||
978 | ||||
979 | /// Return true if lowering to a bit test is suitable for a set of case | |||
980 | /// clusters which contains \p NumDests unique destinations, \p Low and | |||
981 | /// \p High as its lowest and highest case values, and expects \p NumCmps | |||
982 | /// case value comparisons. Check if the number of destinations, comparison | |||
983 | /// metric, and range are all suitable. | |||
984 | bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, | |||
985 | const APInt &Low, const APInt &High, | |||
986 | const DataLayout &DL) const { | |||
987 | // FIXME: I don't think NumCmps is the correct metric: a single case and a | |||
988 | // range of cases both require only one branch to lower. Just looking at the | |||
989 | // number of clusters and destinations should be enough to decide whether to | |||
990 | // build bit tests. | |||
991 | ||||
992 | // To lower a range with bit tests, the range must fit the bitwidth of a | |||
993 | // machine word. | |||
994 | if (!rangeFitsInWord(Low, High, DL)) | |||
995 | return false; | |||
996 | ||||
997 | // Decide whether it's profitable to lower this range with bit tests. Each | |||
998 | // destination requires a bit test and branch, and there is an overall range | |||
999 | // check branch. For a small number of clusters, separate comparisons might | |||
1000 | // be cheaper, and for many destinations, splitting the range might be | |||
1001 | // better. | |||
1002 | return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || | |||
1003 | (NumDests == 3 && NumCmps >= 6); | |||
1004 | } | |||
1005 | ||||
1006 | /// Return true if the specified operation is illegal on this target or | |||
1007 | /// unlikely to be made legal with custom lowering. This is used to help guide | |||
1008 | /// high-level lowering decisions. | |||
1009 | bool isOperationExpand(unsigned Op, EVT VT) const { | |||
1010 | return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); | |||
1011 | } | |||
1012 | ||||
1013 | /// Return true if the specified operation is legal on this target. | |||
1014 | bool isOperationLegal(unsigned Op, EVT VT) const { | |||
1015 | return (VT == MVT::Other || isTypeLegal(VT)) && | |||
1016 | getOperationAction(Op, VT) == Legal; | |||
1017 | } | |||
1018 | ||||
1019 | /// Return how this load with extension should be treated: either it is legal, | |||
1020 | /// needs to be promoted to a larger size, needs to be expanded to some other | |||
1021 | /// code sequence, or the target has a custom expander for it. | |||
1022 | LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, | |||
1023 | EVT MemVT) const { | |||
1024 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; | |||
1025 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; | |||
1026 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; | |||
1027 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT ::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1028, __PRETTY_FUNCTION__)) | |||
1028 | MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT ::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1028, __PRETTY_FUNCTION__)); | |||
1029 | unsigned Shift = 4 * ExtType; | |||
1030 | return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); | |||
1031 | } | |||
1032 | ||||
1033 | /// Return true if the specified load with extension is legal on this target. | |||
1034 | bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { | |||
1035 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; | |||
1036 | } | |||
1037 | ||||
1038 | /// Return true if the specified load with extension is legal or custom | |||
1039 | /// on this target. | |||
1040 | bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { | |||
1041 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || | |||
1042 | getLoadExtAction(ExtType, ValVT, MemVT) == Custom; | |||
1043 | } | |||
1044 | ||||
1045 | /// Return how this store with truncation should be treated: either it is | |||
1046 | /// legal, needs to be promoted to a larger size, needs to be expanded to some | |||
1047 | /// other code sequence, or the target has a custom expander for it. | |||
1048 | LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { | |||
1049 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; | |||
1050 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; | |||
1051 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; | |||
1052 | assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1053, __PRETTY_FUNCTION__)) | |||
1053 | "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1053, __PRETTY_FUNCTION__)); | |||
1054 | return TruncStoreActions[ValI][MemI]; | |||
1055 | } | |||
1056 | ||||
1057 | /// Return true if the specified store with truncation is legal on this | |||
1058 | /// target. | |||
1059 | bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { | |||
1060 | return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; | |||
1061 | } | |||
1062 | ||||
1063 | /// Return true if the specified store with truncation has solution on this | |||
1064 | /// target. | |||
1065 | bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { | |||
1066 | return isTypeLegal(ValVT) && | |||
1067 | (getTruncStoreAction(ValVT, MemVT) == Legal || | |||
1068 | getTruncStoreAction(ValVT, MemVT) == Custom); | |||
1069 | } | |||
1070 | ||||
1071 | /// Return how the indexed load should be treated: either it is legal, needs | |||
1072 | /// to be promoted to a larger size, needs to be expanded to some other code | |||
1073 | /// sequence, or the target has a custom expander for it. | |||
1074 | LegalizeAction | |||
1075 | getIndexedLoadAction(unsigned IdxMode, MVT VT) const { | |||
1076 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1077, __PRETTY_FUNCTION__)) | |||
1077 | "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1077, __PRETTY_FUNCTION__)); | |||
1078 | unsigned Ty = (unsigned)VT.SimpleTy; | |||
1079 | return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); | |||
1080 | } | |||
1081 | ||||
1082 | /// Return true if the specified indexed load is legal on this target. | |||
1083 | bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { | |||
1084 | return VT.isSimple() && | |||
1085 | (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || | |||
1086 | getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); | |||
1087 | } | |||
1088 | ||||
1089 | /// Return how the indexed store should be treated: either it is legal, needs | |||
1090 | /// to be promoted to a larger size, needs to be expanded to some other code | |||
1091 | /// sequence, or the target has a custom expander for it. | |||
1092 | LegalizeAction | |||
1093 | getIndexedStoreAction(unsigned IdxMode, MVT VT) const { | |||
1094 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1095, __PRETTY_FUNCTION__)) | |||
1095 | "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1095, __PRETTY_FUNCTION__)); | |||
1096 | unsigned Ty = (unsigned)VT.SimpleTy; | |||
1097 | return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); | |||
1098 | } | |||
1099 | ||||
1100 | /// Return true if the specified indexed load is legal on this target. | |||
1101 | bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { | |||
1102 | return VT.isSimple() && | |||
1103 | (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || | |||
1104 | getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); | |||
1105 | } | |||
1106 | ||||
1107 | /// Return how the condition code should be treated: either it is legal, needs | |||
1108 | /// to be expanded to some other code sequence, or the target has a custom | |||
1109 | /// expander for it. | |||
1110 | LegalizeAction | |||
1111 | getCondCodeAction(ISD::CondCode CC, MVT VT) const { | |||
1112 | assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions [0]) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1114, __PRETTY_FUNCTION__)) | |||
1113 | ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions [0]) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1114, __PRETTY_FUNCTION__)) | |||
1114 | "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions [0]) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1114, __PRETTY_FUNCTION__)); | |||
1115 | // See setCondCodeAction for how this is encoded. | |||
1116 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); | |||
1117 | uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; | |||
1118 | LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); | |||
1119 | assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!" ) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1119, __PRETTY_FUNCTION__)); | |||
1120 | return Action; | |||
1121 | } | |||
1122 | ||||
1123 | /// Return true if the specified condition code is legal on this target. | |||
1124 | bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { | |||
1125 | return getCondCodeAction(CC, VT) == Legal; | |||
1126 | } | |||
1127 | ||||
1128 | /// Return true if the specified condition code is legal or custom on this | |||
1129 | /// target. | |||
1130 | bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { | |||
1131 | return getCondCodeAction(CC, VT) == Legal || | |||
1132 | getCondCodeAction(CC, VT) == Custom; | |||
1133 | } | |||
1134 | ||||
1135 | /// If the action for this operation is to promote, this method returns the | |||
1136 | /// ValueType to promote to. | |||
1137 | MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { | |||
1138 | assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!" ) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1139, __PRETTY_FUNCTION__)) | |||
1139 | "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!" ) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1139, __PRETTY_FUNCTION__)); | |||
1140 | ||||
1141 | // See if this has an explicit type specified. | |||
1142 | std::map<std::pair<unsigned, MVT::SimpleValueType>, | |||
1143 | MVT::SimpleValueType>::const_iterator PTTI = | |||
1144 | PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); | |||
1145 | if (PTTI != PromoteToType.end()) return PTTI->second; | |||
1146 | ||||
1147 | assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType." ) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1148, __PRETTY_FUNCTION__)) | |||
1148 | "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType." ) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1148, __PRETTY_FUNCTION__)); | |||
1149 | ||||
1150 | MVT NVT = VT; | |||
1151 | do { | |||
1152 | NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); | |||
1153 | assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && "Didn't find type to promote to!") ? static_cast< void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1154, __PRETTY_FUNCTION__)) | |||
1154 | "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && "Didn't find type to promote to!") ? static_cast< void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1154, __PRETTY_FUNCTION__)); | |||
1155 | } while (!isTypeLegal(NVT) || | |||
1156 | getOperationAction(Op, NVT) == Promote); | |||
1157 | return NVT; | |||
1158 | } | |||
1159 | ||||
1160 | /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM | |||
1161 | /// operations except for the pointer size. If AllowUnknown is true, this | |||
1162 | /// will return MVT::Other for types with no EVT counterpart (e.g. structs), | |||
1163 | /// otherwise it will assert. | |||
1164 | EVT getValueType(const DataLayout &DL, Type *Ty, | |||
1165 | bool AllowUnknown = false) const { | |||
1166 | // Lower scalar pointers to native pointer types. | |||
1167 | if (PointerType *PTy = dyn_cast<PointerType>(Ty)) | |||
1168 | return getPointerTy(DL, PTy->getAddressSpace()); | |||
1169 | ||||
1170 | if (Ty->isVectorTy()) { | |||
| ||||
1171 | VectorType *VTy = cast<VectorType>(Ty); | |||
1172 | Type *Elm = VTy->getElementType(); | |||
1173 | // Lower vectors of pointers to native pointer types. | |||
1174 | if (PointerType *PT = dyn_cast<PointerType>(Elm)) { | |||
1175 | EVT PointerTy(getPointerTy(DL, PT->getAddressSpace())); | |||
1176 | Elm = PointerTy.getTypeForEVT(Ty->getContext()); | |||
1177 | } | |||
1178 | ||||
1179 | return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), | |||
1180 | VTy->getNumElements()); | |||
1181 | } | |||
1182 | return EVT::getEVT(Ty, AllowUnknown); | |||
1183 | } | |||
1184 | ||||
1185 | EVT getMemValueType(const DataLayout &DL, Type *Ty, | |||
1186 | bool AllowUnknown = false) const { | |||
1187 | // Lower scalar pointers to native pointer types. | |||
1188 | if (PointerType *PTy = dyn_cast<PointerType>(Ty)) | |||
1189 | return getPointerMemTy(DL, PTy->getAddressSpace()); | |||
1190 | else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { | |||
1191 | Type *Elm = VTy->getElementType(); | |||
1192 | if (PointerType *PT = dyn_cast<PointerType>(Elm)) { | |||
1193 | EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace())); | |||
1194 | Elm = PointerTy.getTypeForEVT(Ty->getContext()); | |||
1195 | } | |||
1196 | return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), | |||
1197 | VTy->getNumElements()); | |||
1198 | } | |||
1199 | ||||
1200 | return getValueType(DL, Ty, AllowUnknown); | |||
1201 | } | |||
1202 | ||||
1203 | ||||
1204 | /// Return the MVT corresponding to this LLVM type. See getValueType. | |||
1205 | MVT getSimpleValueType(const DataLayout &DL, Type *Ty, | |||
1206 | bool AllowUnknown = false) const { | |||
1207 | return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); | |||
1208 | } | |||
1209 | ||||
1210 | /// Return the desired alignment for ByVal or InAlloca aggregate function | |||
1211 | /// arguments in the caller parameter area. This is the actual alignment, not | |||
1212 | /// its logarithm. | |||
1213 | virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; | |||
1214 | ||||
1215 | /// Return the type of registers that this ValueType will eventually require. | |||
1216 | MVT getRegisterType(MVT VT) const { | |||
1217 | assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1217, __PRETTY_FUNCTION__)); | |||
1218 | return RegisterTypeForVT[VT.SimpleTy]; | |||
1219 | } | |||
1220 | ||||
1221 | /// Return the type of registers that this ValueType will eventually require. | |||
1222 | MVT getRegisterType(LLVMContext &Context, EVT VT) const { | |||
1223 | if (VT.isSimple()) { | |||
1224 | assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1225, __PRETTY_FUNCTION__)) | |||
1225 | array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1225, __PRETTY_FUNCTION__)); | |||
1226 | return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; | |||
1227 | } | |||
1228 | if (VT.isVector()) { | |||
1229 | EVT VT1; | |||
1230 | MVT RegisterVT; | |||
1231 | unsigned NumIntermediates; | |||
1232 | (void)getVectorTypeBreakdown(Context, VT, VT1, | |||
1233 | NumIntermediates, RegisterVT); | |||
1234 | return RegisterVT; | |||
1235 | } | |||
1236 | if (VT.isInteger()) { | |||
1237 | return getRegisterType(Context, getTypeToTransformTo(Context, VT)); | |||
1238 | } | |||
1239 | llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1239); | |||
1240 | } | |||
1241 | ||||
1242 | /// Return the number of registers that this ValueType will eventually | |||
1243 | /// require. | |||
1244 | /// | |||
1245 | /// This is one for any types promoted to live in larger registers, but may be | |||
1246 | /// more than one for types (like i64) that are split into pieces. For types | |||
1247 | /// like i140, which are first promoted then expanded, it is the number of | |||
1248 | /// registers needed to hold all the bits of the original type. For an i140 | |||
1249 | /// on a 32 bit machine this means 5 registers. | |||
1250 | unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { | |||
1251 | if (VT.isSimple()) { | |||
1252 | assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1253, __PRETTY_FUNCTION__)) | |||
1253 | array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1253, __PRETTY_FUNCTION__)); | |||
1254 | return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; | |||
1255 | } | |||
1256 | if (VT.isVector()) { | |||
1257 | EVT VT1; | |||
1258 | MVT VT2; | |||
1259 | unsigned NumIntermediates; | |||
1260 | return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); | |||
1261 | } | |||
1262 | if (VT.isInteger()) { | |||
1263 | unsigned BitWidth = VT.getSizeInBits(); | |||
1264 | unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); | |||
1265 | return (BitWidth + RegWidth - 1) / RegWidth; | |||
1266 | } | |||
1267 | llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1267); | |||
1268 | } | |||
1269 | ||||
1270 | /// Certain combinations of ABIs, Targets and features require that types | |||
1271 | /// are legal for some operations and not for other operations. | |||
1272 | /// For MIPS all vector types must be passed through the integer register set. | |||
1273 | virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, | |||
1274 | CallingConv::ID CC, EVT VT) const { | |||
1275 | return getRegisterType(Context, VT); | |||
1276 | } | |||
1277 | ||||
1278 | /// Certain targets require unusual breakdowns of certain types. For MIPS, | |||
1279 | /// this occurs when a vector type is used, as vector are passed through the | |||
1280 | /// integer register set. | |||
1281 | virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, | |||
1282 | CallingConv::ID CC, | |||
1283 | EVT VT) const { | |||
1284 | return getNumRegisters(Context, VT); | |||
1285 | } | |||
1286 | ||||
1287 | /// Certain targets have context senstive alignment requirements, where one | |||
1288 | /// type has the alignment requirement of another type. | |||
1289 | virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, | |||
1290 | DataLayout DL) const { | |||
1291 | return DL.getABITypeAlignment(ArgTy); | |||
1292 | } | |||
1293 | ||||
1294 | /// If true, then instruction selection should seek to shrink the FP constant | |||
1295 | /// of the specified type to a smaller type in order to save space and / or | |||
1296 | /// reduce runtime. | |||
1297 | virtual bool ShouldShrinkFPConstant(EVT) const { return true; } | |||
1298 | ||||
1299 | /// Return true if it is profitable to reduce a load to a smaller type. | |||
1300 | /// Example: (i16 (trunc (i32 (load x))) -> i16 load x | |||
1301 | virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, | |||
1302 | EVT NewVT) const { | |||
1303 | // By default, assume that it is cheaper to extract a subvector from a wide | |||
1304 | // vector load rather than creating multiple narrow vector loads. | |||
1305 | if (NewVT.isVector() && !Load->hasOneUse()) | |||
1306 | return false; | |||
1307 | ||||
1308 | return true; | |||
1309 | } | |||
1310 | ||||
1311 | /// When splitting a value of the specified type into parts, does the Lo | |||
1312 | /// or Hi part come first? This usually follows the endianness, except | |||
1313 | /// for ppcf128, where the Hi part always comes first. | |||
1314 | bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { | |||
1315 | return DL.isBigEndian() || VT == MVT::ppcf128; | |||
1316 | } | |||
1317 | ||||
1318 | /// If true, the target has custom DAG combine transformations that it can | |||
1319 | /// perform for the specified node. | |||
1320 | bool hasTargetDAGCombine(ISD::NodeType NT) const { | |||
1321 | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray )) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1321, __PRETTY_FUNCTION__)); | |||
1322 | return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); | |||
1323 | } | |||
1324 | ||||
1325 | unsigned getGatherAllAliasesMaxDepth() const { | |||
1326 | return GatherAllAliasesMaxDepth; | |||
1327 | } | |||
1328 | ||||
1329 | /// Returns the size of the platform's va_list object. | |||
1330 | virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { | |||
1331 | return getPointerTy(DL).getSizeInBits(); | |||
1332 | } | |||
1333 | ||||
1334 | /// Get maximum # of store operations permitted for llvm.memset | |||
1335 | /// | |||
1336 | /// This function returns the maximum number of store operations permitted | |||
1337 | /// to replace a call to llvm.memset. The value is set by the target at the | |||
1338 | /// performance threshold for such a replacement. If OptSize is true, | |||
1339 | /// return the limit for functions that have OptSize attribute. | |||
1340 | unsigned getMaxStoresPerMemset(bool OptSize) const { | |||
1341 | return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; | |||
1342 | } | |||
1343 | ||||
1344 | /// Get maximum # of store operations permitted for llvm.memcpy | |||
1345 | /// | |||
1346 | /// This function returns the maximum number of store operations permitted | |||
1347 | /// to replace a call to llvm.memcpy. The value is set by the target at the | |||
1348 | /// performance threshold for such a replacement. If OptSize is true, | |||
1349 | /// return the limit for functions that have OptSize attribute. | |||
1350 | unsigned getMaxStoresPerMemcpy(bool OptSize) const { | |||
1351 | return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; | |||
1352 | } | |||
1353 | ||||
1354 | /// \brief Get maximum # of store operations to be glued together | |||
1355 | /// | |||
1356 | /// This function returns the maximum number of store operations permitted | |||
1357 | /// to glue together during lowering of llvm.memcpy. The value is set by | |||
1358 | // the target at the performance threshold for such a replacement. | |||
1359 | virtual unsigned getMaxGluedStoresPerMemcpy() const { | |||
1360 | return MaxGluedStoresPerMemcpy; | |||
1361 | } | |||
1362 | ||||
1363 | /// Get maximum # of load operations permitted for memcmp | |||
1364 | /// | |||
1365 | /// This function returns the maximum number of load operations permitted | |||
1366 | /// to replace a call to memcmp. The value is set by the target at the | |||
1367 | /// performance threshold for such a replacement. If OptSize is true, | |||
1368 | /// return the limit for functions that have OptSize attribute. | |||
1369 | unsigned getMaxExpandSizeMemcmp(bool OptSize) const { | |||
1370 | return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; | |||
1371 | } | |||
1372 | ||||
1373 | /// For memcmp expansion when the memcmp result is only compared equal or | |||
1374 | /// not-equal to 0, allow up to this number of load pairs per block. As an | |||
1375 | /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block: | |||
1376 | /// a0 = load2bytes &a[0] | |||
1377 | /// b0 = load2bytes &b[0] | |||
1378 | /// a2 = load1byte &a[2] | |||
1379 | /// b2 = load1byte &b[2] | |||
1380 | /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0 | |||
1381 | virtual unsigned getMemcmpEqZeroLoadsPerBlock() const { | |||
1382 | return 1; | |||
1383 | } | |||
1384 | ||||
1385 | /// Get maximum # of store operations permitted for llvm.memmove | |||
1386 | /// | |||
1387 | /// This function returns the maximum number of store operations permitted | |||
1388 | /// to replace a call to llvm.memmove. The value is set by the target at the | |||
1389 | /// performance threshold for such a replacement. If OptSize is true, | |||
1390 | /// return the limit for functions that have OptSize attribute. | |||
1391 | unsigned getMaxStoresPerMemmove(bool OptSize) const { | |||
1392 | return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; | |||
1393 | } | |||
1394 | ||||
1395 | /// Determine if the target supports unaligned memory accesses. | |||
1396 | /// | |||
1397 | /// This function returns true if the target allows unaligned memory accesses | |||
1398 | /// of the specified type in the given address space. If true, it also returns | |||
1399 | /// whether the unaligned memory access is "fast" in the last argument by | |||
1400 | /// reference. This is used, for example, in situations where an array | |||
1401 | /// copy/move/set is converted to a sequence of store operations. Its use | |||
1402 | /// helps to ensure that such replacements don't generate code that causes an | |||
1403 | /// alignment error (trap) on the target machine. | |||
1404 | virtual bool allowsMisalignedMemoryAccesses(EVT, | |||
1405 | unsigned AddrSpace = 0, | |||
1406 | unsigned Align = 1, | |||
1407 | bool * /*Fast*/ = nullptr) const { | |||
1408 | return false; | |||
1409 | } | |||
1410 | ||||
1411 | /// Return true if the target supports a memory access of this type for the | |||
1412 | /// given address space and alignment. If the access is allowed, the optional | |||
1413 | /// final parameter returns if the access is also fast (as defined by the | |||
1414 | /// target). | |||
1415 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, | |||
1416 | unsigned AddrSpace = 0, unsigned Alignment = 1, | |||
1417 | bool *Fast = nullptr) const; | |||
1418 | ||||
1419 | /// Returns the target specific optimal type for load and store operations as | |||
1420 | /// a result of memset, memcpy, and memmove lowering. | |||
1421 | /// | |||
1422 | /// If DstAlign is zero that means it's safe to destination alignment can | |||
1423 | /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't | |||
1424 | /// a need to check it against alignment requirement, probably because the | |||
1425 | /// source does not need to be loaded. If 'IsMemset' is true, that means it's | |||
1426 | /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of | |||
1427 | /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it | |||
1428 | /// does not need to be loaded. It returns EVT::Other if the type should be | |||
1429 | /// determined using generic target-independent logic. | |||
1430 | virtual EVT | |||
1431 | getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/, | |||
1432 | unsigned /*SrcAlign*/, bool /*IsMemset*/, | |||
1433 | bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, | |||
1434 | const AttributeList & /*FuncAttributes*/) const { | |||
1435 | return MVT::Other; | |||
1436 | } | |||
1437 | ||||
1438 | /// Returns true if it's safe to use load / store of the specified type to | |||
1439 | /// expand memcpy / memset inline. | |||
1440 | /// | |||
1441 | /// This is mostly true for all types except for some special cases. For | |||
1442 | /// example, on X86 targets without SSE2 f64 load / store are done with fldl / | |||
1443 | /// fstpl which also does type conversion. Note the specified type doesn't | |||
1444 | /// have to be legal as the hook is used before type legalization. | |||
1445 | virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } | |||
1446 | ||||
1447 | /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp. | |||
1448 | bool usesUnderscoreSetJmp() const { | |||
1449 | return UseUnderscoreSetJmp; | |||
1450 | } | |||
1451 | ||||
1452 | /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp. | |||
1453 | bool usesUnderscoreLongJmp() const { | |||
1454 | return UseUnderscoreLongJmp; | |||
1455 | } | |||
1456 | ||||
1457 | /// Return lower limit for number of blocks in a jump table. | |||
1458 | virtual unsigned getMinimumJumpTableEntries() const; | |||
1459 | ||||
1460 | /// Return lower limit of the density in a jump table. | |||
1461 | unsigned getMinimumJumpTableDensity(bool OptForSize) const; | |||
1462 | ||||
1463 | /// Return upper limit for number of entries in a jump table. | |||
1464 | /// Zero if no limit. | |||
1465 | unsigned getMaximumJumpTableSize() const; | |||
1466 | ||||
1467 | virtual bool isJumpTableRelative() const { | |||
1468 | return TM.isPositionIndependent(); | |||
1469 | } | |||
1470 | ||||
1471 | /// If a physical register, this specifies the register that | |||
1472 | /// llvm.savestack/llvm.restorestack should save and restore. | |||
1473 | unsigned getStackPointerRegisterToSaveRestore() const { | |||
1474 | return StackPointerRegisterToSaveRestore; | |||
1475 | } | |||
1476 | ||||
1477 | /// If a physical register, this returns the register that receives the | |||
1478 | /// exception address on entry to an EH pad. | |||
1479 | virtual unsigned | |||
1480 | getExceptionPointerRegister(const Constant *PersonalityFn) const { | |||
1481 | // 0 is guaranteed to be the NoRegister value on all targets | |||
1482 | return 0; | |||
1483 | } | |||
1484 | ||||
1485 | /// If a physical register, this returns the register that receives the | |||
1486 | /// exception typeid on entry to a landing pad. | |||
1487 | virtual unsigned | |||
1488 | getExceptionSelectorRegister(const Constant *PersonalityFn) const { | |||
1489 | // 0 is guaranteed to be the NoRegister value on all targets | |||
1490 | return 0; | |||
1491 | } | |||
1492 | ||||
1493 | virtual bool needsFixedCatchObjects() const { | |||
1494 | report_fatal_error("Funclet EH is not implemented for this target"); | |||
1495 | } | |||
1496 | ||||
1497 | /// Returns the target's jmp_buf size in bytes (if never set, the default is | |||
1498 | /// 200) | |||
1499 | unsigned getJumpBufSize() const { | |||
1500 | return JumpBufSize; | |||
1501 | } | |||
1502 | ||||
1503 | /// Returns the target's jmp_buf alignment in bytes (if never set, the default | |||
1504 | /// is 0) | |||
1505 | unsigned getJumpBufAlignment() const { | |||
1506 | return JumpBufAlignment; | |||
1507 | } | |||
1508 | ||||
1509 | /// Return the minimum stack alignment of an argument. | |||
1510 | unsigned getMinStackArgumentAlignment() const { | |||
1511 | return MinStackArgumentAlignment; | |||
1512 | } | |||
1513 | ||||
1514 | /// Return the minimum function alignment. | |||
1515 | unsigned getMinFunctionAlignment() const { | |||
1516 | return MinFunctionAlignment; | |||
1517 | } | |||
1518 | ||||
1519 | /// Return the preferred function alignment. | |||
1520 | unsigned getPrefFunctionAlignment() const { | |||
1521 | return PrefFunctionAlignment; | |||
1522 | } | |||
1523 | ||||
1524 | /// Return the preferred loop alignment. | |||
1525 | virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const { | |||
1526 | return PrefLoopAlignment; | |||
1527 | } | |||
1528 | ||||
1529 | /// Should loops be aligned even when the function is marked OptSize (but not | |||
1530 | /// MinSize). | |||
1531 | virtual bool alignLoopsWithOptSize() const { | |||
1532 | return false; | |||
1533 | } | |||
1534 | ||||
1535 | /// If the target has a standard location for the stack protector guard, | |||
1536 | /// returns the address of that location. Otherwise, returns nullptr. | |||
1537 | /// DEPRECATED: please override useLoadStackGuardNode and customize | |||
1538 | /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). | |||
1539 | virtual Value *getIRStackGuard(IRBuilder<> &IRB) const; | |||
1540 | ||||
1541 | /// Inserts necessary declarations for SSP (stack protection) purpose. | |||
1542 | /// Should be used only when getIRStackGuard returns nullptr. | |||
1543 | virtual void insertSSPDeclarations(Module &M) const; | |||
1544 | ||||
1545 | /// Return the variable that's previously inserted by insertSSPDeclarations, | |||
1546 | /// if any, otherwise return nullptr. Should be used only when | |||
1547 | /// getIRStackGuard returns nullptr. | |||
1548 | virtual Value *getSDagStackGuard(const Module &M) const; | |||
1549 | ||||
1550 | /// If this function returns true, stack protection checks should XOR the | |||
1551 | /// frame pointer (or whichever pointer is used to address locals) into the | |||
1552 | /// stack guard value before checking it. getIRStackGuard must return nullptr | |||
1553 | /// if this returns true. | |||
1554 | virtual bool useStackGuardXorFP() const { return false; } | |||
1555 | ||||
1556 | /// If the target has a standard stack protection check function that | |||
1557 | /// performs validation and error handling, returns the function. Otherwise, | |||
1558 | /// returns nullptr. Must be previously inserted by insertSSPDeclarations. | |||
1559 | /// Should be used only when getIRStackGuard returns nullptr. | |||
1560 | virtual Function *getSSPStackGuardCheck(const Module &M) const; | |||
1561 | ||||
1562 | protected: | |||
1563 | Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, | |||
1564 | bool UseTLS) const; | |||
1565 | ||||
1566 | public: | |||
1567 | /// Returns the target-specific address of the unsafe stack pointer. | |||
1568 | virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; | |||
1569 | ||||
1570 | /// Returns the name of the symbol used to emit stack probes or the empty | |||
1571 | /// string if not applicable. | |||
1572 | virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { | |||
1573 | return ""; | |||
1574 | } | |||
1575 | ||||
1576 | /// Returns true if a cast between SrcAS and DestAS is a noop. | |||
1577 | virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { | |||
1578 | return false; | |||
1579 | } | |||
1580 | ||||
1581 | /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we | |||
1582 | /// are happy to sink it into basic blocks. | |||
1583 | virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { | |||
1584 | return isNoopAddrSpaceCast(SrcAS, DestAS); | |||
1585 | } | |||
1586 | ||||
1587 | /// Return true if the pointer arguments to CI should be aligned by aligning | |||
1588 | /// the object whose address is being passed. If so then MinSize is set to the | |||
1589 | /// minimum size the object must be to be aligned and PrefAlign is set to the | |||
1590 | /// preferred alignment. | |||
1591 | virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, | |||
1592 | unsigned & /*PrefAlign*/) const { | |||
1593 | return false; | |||
1594 | } | |||
1595 | ||||
1596 | //===--------------------------------------------------------------------===// | |||
1597 | /// \name Helpers for TargetTransformInfo implementations | |||
1598 | /// @{ | |||
1599 | ||||
1600 | /// Get the ISD node that corresponds to the Instruction class opcode. | |||
1601 | int InstructionOpcodeToISD(unsigned Opcode) const; | |||
1602 | ||||
1603 | /// Estimate the cost of type-legalization and the legalized type. | |||
1604 | std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL, | |||
1605 | Type *Ty) const; | |||
1606 | ||||
1607 | /// @} | |||
1608 | ||||
1609 | //===--------------------------------------------------------------------===// | |||
1610 | /// \name Helpers for atomic expansion. | |||
1611 | /// @{ | |||
1612 | ||||
1613 | /// Returns the maximum atomic operation size (in bits) supported by | |||
1614 | /// the backend. Atomic operations greater than this size (as well | |||
1615 | /// as ones that are not naturally aligned), will be expanded by | |||
1616 | /// AtomicExpandPass into an __atomic_* library call. | |||
1617 | unsigned getMaxAtomicSizeInBitsSupported() const { | |||
1618 | return MaxAtomicSizeInBitsSupported; | |||
1619 | } | |||
1620 | ||||
1621 | /// Returns the size of the smallest cmpxchg or ll/sc instruction | |||
1622 | /// the backend supports. Any smaller operations are widened in | |||
1623 | /// AtomicExpandPass. | |||
1624 | /// | |||
1625 | /// Note that *unlike* operations above the maximum size, atomic ops | |||
1626 | /// are still natively supported below the minimum; they just | |||
1627 | /// require a more complex expansion. | |||
1628 | unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } | |||
1629 | ||||
1630 | /// Whether the target supports unaligned atomic operations. | |||
1631 | bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } | |||
1632 | ||||
1633 | /// Whether AtomicExpandPass should automatically insert fences and reduce | |||
1634 | /// ordering for this atomic. This should be true for most architectures with | |||
1635 | /// weak memory ordering. Defaults to false. | |||
1636 | virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { | |||
1637 | return false; | |||
1638 | } | |||
1639 | ||||
1640 | /// Perform a load-linked operation on Addr, returning a "Value *" with the | |||
1641 | /// corresponding pointee type. This may entail some non-trivial operations to | |||
1642 | /// truncate or reconstruct types that will be illegal in the backend. See | |||
1643 | /// ARMISelLowering for an example implementation. | |||
1644 | virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, | |||
1645 | AtomicOrdering Ord) const { | |||
1646 | llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1646); | |||
1647 | } | |||
1648 | ||||
1649 | /// Perform a store-conditional operation to Addr. Return the status of the | |||
1650 | /// store. This should be 0 if the store succeeded, non-zero otherwise. | |||
1651 | virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, | |||
1652 | Value *Addr, AtomicOrdering Ord) const { | |||
1653 | llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1653); | |||
1654 | } | |||
1655 | ||||
1656 | /// Perform a masked atomicrmw using a target-specific intrinsic. This | |||
1657 | /// represents the core LL/SC loop which will be lowered at a late stage by | |||
1658 | /// the backend. | |||
1659 | virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder, | |||
1660 | AtomicRMWInst *AI, | |||
1661 | Value *AlignedAddr, Value *Incr, | |||
1662 | Value *Mask, Value *ShiftAmt, | |||
1663 | AtomicOrdering Ord) const { | |||
1664 | llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1664); | |||
1665 | } | |||
1666 | ||||
1667 | /// Perform a masked cmpxchg using a target-specific intrinsic. This | |||
1668 | /// represents the core LL/SC loop which will be lowered at a late stage by | |||
1669 | /// the backend. | |||
1670 | virtual Value *emitMaskedAtomicCmpXchgIntrinsic( | |||
1671 | IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, | |||
1672 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { | |||
1673 | llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1673); | |||
1674 | } | |||
1675 | ||||
1676 | /// Inserts in the IR a target-specific intrinsic specifying a fence. | |||
1677 | /// It is called by AtomicExpandPass before expanding an | |||
1678 | /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad | |||
1679 | /// if shouldInsertFencesForAtomic returns true. | |||
1680 | /// | |||
1681 | /// Inst is the original atomic instruction, prior to other expansions that | |||
1682 | /// may be performed. | |||
1683 | /// | |||
1684 | /// This function should either return a nullptr, or a pointer to an IR-level | |||
1685 | /// Instruction*. Even complex fence sequences can be represented by a | |||
1686 | /// single Instruction* through an intrinsic to be lowered later. | |||
1687 | /// Backends should override this method to produce target-specific intrinsic | |||
1688 | /// for their fences. | |||
1689 | /// FIXME: Please note that the default implementation here in terms of | |||
1690 | /// IR-level fences exists for historical/compatibility reasons and is | |||
1691 | /// *unsound* ! Fences cannot, in general, be used to restore sequential | |||
1692 | /// consistency. For example, consider the following example: | |||
1693 | /// atomic<int> x = y = 0; | |||
1694 | /// int r1, r2, r3, r4; | |||
1695 | /// Thread 0: | |||
1696 | /// x.store(1); | |||
1697 | /// Thread 1: | |||
1698 | /// y.store(1); | |||
1699 | /// Thread 2: | |||
1700 | /// r1 = x.load(); | |||
1701 | /// r2 = y.load(); | |||
1702 | /// Thread 3: | |||
1703 | /// r3 = y.load(); | |||
1704 | /// r4 = x.load(); | |||
1705 | /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all | |||
1706 | /// seq_cst. But if they are lowered to monotonic accesses, no amount of | |||
1707 | /// IR-level fences can prevent it. | |||
1708 | /// @{ | |||
1709 | virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, | |||
1710 | AtomicOrdering Ord) const { | |||
1711 | if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore()) | |||
1712 | return Builder.CreateFence(Ord); | |||
1713 | else | |||
1714 | return nullptr; | |||
1715 | } | |||
1716 | ||||
1717 | virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, | |||
1718 | Instruction *Inst, | |||
1719 | AtomicOrdering Ord) const { | |||
1720 | if (isAcquireOrStronger(Ord)) | |||
1721 | return Builder.CreateFence(Ord); | |||
1722 | else | |||
1723 | return nullptr; | |||
1724 | } | |||
1725 | /// @} | |||
1726 | ||||
1727 | // Emits code that executes when the comparison result in the ll/sc | |||
1728 | // expansion of a cmpxchg instruction is such that the store-conditional will | |||
1729 | // not execute. This makes it possible to balance out the load-linked with | |||
1730 | // a dedicated instruction, if desired. | |||
1731 | // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would | |||
1732 | // be unnecessarily held, except if clrex, inserted by this hook, is executed. | |||
1733 | virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} | |||
1734 | ||||
1735 | /// Returns true if the given (atomic) store should be expanded by the | |||
1736 | /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. | |||
1737 | virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { | |||
1738 | return false; | |||
1739 | } | |||
1740 | ||||
1741 | /// Returns true if arguments should be sign-extended in lib calls. | |||
1742 | virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { | |||
1743 | return IsSigned; | |||
1744 | } | |||
1745 | ||||
1746 | /// Returns how the given (atomic) load should be expanded by the | |||
1747 | /// IR-level AtomicExpand pass. | |||
1748 | virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { | |||
1749 | return AtomicExpansionKind::None; | |||
1750 | } | |||
1751 | ||||
1752 | /// Returns how the given atomic cmpxchg should be expanded by the IR-level | |||
1753 | /// AtomicExpand pass. | |||
1754 | virtual AtomicExpansionKind | |||
1755 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { | |||
1756 | return AtomicExpansionKind::None; | |||
1757 | } | |||
1758 | ||||
1759 | /// Returns how the IR-level AtomicExpand pass should expand the given | |||
1760 | /// AtomicRMW, if at all. Default is to never expand. | |||
1761 | virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { | |||
1762 | return RMW->isFloatingPointOperation() ? | |||
1763 | AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; | |||
1764 | } | |||
1765 | ||||
1766 | /// On some platforms, an AtomicRMW that never actually modifies the value | |||
1767 | /// (such as fetch_add of 0) can be turned into a fence followed by an | |||
1768 | /// atomic load. This may sound useless, but it makes it possible for the | |||
1769 | /// processor to keep the cacheline shared, dramatically improving | |||
1770 | /// performance. And such idempotent RMWs are useful for implementing some | |||
1771 | /// kinds of locks, see for example (justification + benchmarks): | |||
1772 | /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf | |||
1773 | /// This method tries doing that transformation, returning the atomic load if | |||
1774 | /// it succeeds, and nullptr otherwise. | |||
1775 | /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo | |||
1776 | /// another round of expansion. | |||
1777 | virtual LoadInst * | |||
1778 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { | |||
1779 | return nullptr; | |||
1780 | } | |||
1781 | ||||
1782 | /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, | |||
1783 | /// SIGN_EXTEND, or ANY_EXTEND). | |||
1784 | virtual ISD::NodeType getExtendForAtomicOps() const { | |||
1785 | return ISD::ZERO_EXTEND; | |||
1786 | } | |||
1787 | ||||
1788 | /// @} | |||
1789 | ||||
1790 | /// Returns true if we should normalize | |||
1791 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and | |||
1792 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely | |||
1793 | /// that it saves us from materializing N0 and N1 in an integer register. | |||
1794 | /// Targets that are able to perform and/or on flags should return false here. | |||
1795 | virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, | |||
1796 | EVT VT) const { | |||
1797 | // If a target has multiple condition registers, then it likely has logical | |||
1798 | // operations on those registers. | |||
1799 | if (hasMultipleConditionRegisters()) | |||
1800 | return false; | |||
1801 | // Only do the transform if the value won't be split into multiple | |||
1802 | // registers. | |||
1803 | LegalizeTypeAction Action = getTypeAction(Context, VT); | |||
1804 | return Action != TypeExpandInteger && Action != TypeExpandFloat && | |||
1805 | Action != TypeSplitVector; | |||
1806 | } | |||
1807 | ||||
1808 | virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } | |||
1809 | ||||
1810 | /// Return true if a select of constants (select Cond, C1, C2) should be | |||
1811 | /// transformed into simple math ops with the condition value. For example: | |||
1812 | /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 | |||
1813 | virtual bool convertSelectOfConstantsToMath(EVT VT) const { | |||
1814 | return false; | |||
1815 | } | |||
1816 | ||||
1817 | /// Return true if it is profitable to transform an integer | |||
1818 | /// multiplication-by-constant into simpler operations like shifts and adds. | |||
1819 | /// This may be true if the target does not directly support the | |||
1820 | /// multiplication operation for the specified type or the sequence of simpler | |||
1821 | /// ops is faster than the multiply. | |||
1822 | virtual bool decomposeMulByConstant(EVT VT, SDValue C) const { | |||
1823 | return false; | |||
1824 | } | |||
1825 | ||||
1826 | /// Return true if it is more correct/profitable to use strict FP_TO_INT | |||
1827 | /// conversion operations - canonicalizing the FP source value instead of | |||
1828 | /// converting all cases and then selecting based on value. | |||
1829 | /// This may be true if the target throws exceptions for out of bounds | |||
1830 | /// conversions or has fast FP CMOV. | |||
1831 | virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, | |||
1832 | bool IsSigned) const { | |||
1833 | return false; | |||
1834 | } | |||
1835 | ||||
1836 | //===--------------------------------------------------------------------===// | |||
1837 | // TargetLowering Configuration Methods - These methods should be invoked by | |||
1838 | // the derived class constructor to configure this object for the target. | |||
1839 | // | |||
1840 | protected: | |||
1841 | /// Specify how the target extends the result of integer and floating point | |||
1842 | /// boolean values from i1 to a wider type. See getBooleanContents. | |||
1843 | void setBooleanContents(BooleanContent Ty) { | |||
1844 | BooleanContents = Ty; | |||
1845 | BooleanFloatContents = Ty; | |||
1846 | } | |||
1847 | ||||
1848 | /// Specify how the target extends the result of integer and floating point | |||
1849 | /// boolean values from i1 to a wider type. See getBooleanContents. | |||
1850 | void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { | |||
1851 | BooleanContents = IntTy; | |||
1852 | BooleanFloatContents = FloatTy; | |||
1853 | } | |||
1854 | ||||
1855 | /// Specify how the target extends the result of a vector boolean value from a | |||
1856 | /// vector of i1 to a wider type. See getBooleanContents. | |||
1857 | void setBooleanVectorContents(BooleanContent Ty) { | |||
1858 | BooleanVectorContents = Ty; | |||
1859 | } | |||
1860 | ||||
1861 | /// Specify the target scheduling preference. | |||
1862 | void setSchedulingPreference(Sched::Preference Pref) { | |||
1863 | SchedPreferenceInfo = Pref; | |||
1864 | } | |||
1865 | ||||
1866 | /// Indicate whether this target prefers to use _setjmp to implement | |||
1867 | /// llvm.setjmp or the version without _. Defaults to false. | |||
1868 | void setUseUnderscoreSetJmp(bool Val) { | |||
1869 | UseUnderscoreSetJmp = Val; | |||
1870 | } | |||
1871 | ||||
1872 | /// Indicate whether this target prefers to use _longjmp to implement | |||
1873 | /// llvm.longjmp or the version without _. Defaults to false. | |||
1874 | void setUseUnderscoreLongJmp(bool Val) { | |||
1875 | UseUnderscoreLongJmp = Val; | |||
1876 | } | |||
1877 | ||||
1878 | /// Indicate the minimum number of blocks to generate jump tables. | |||
1879 | void setMinimumJumpTableEntries(unsigned Val); | |||
1880 | ||||
1881 | /// Indicate the maximum number of entries in jump tables. | |||
1882 | /// Set to zero to generate unlimited jump tables. | |||
1883 | void setMaximumJumpTableSize(unsigned); | |||
1884 | ||||
1885 | /// If set to a physical register, this specifies the register that | |||
1886 | /// llvm.savestack/llvm.restorestack should save and restore. | |||
1887 | void setStackPointerRegisterToSaveRestore(unsigned R) { | |||
1888 | StackPointerRegisterToSaveRestore = R; | |||
1889 | } | |||
1890 | ||||
1891 | /// Tells the code generator that the target has multiple (allocatable) | |||
1892 | /// condition registers that can be used to store the results of comparisons | |||
1893 | /// for use by selects and conditional branches. With multiple condition | |||
1894 | /// registers, the code generator will not aggressively sink comparisons into | |||
1895 | /// the blocks of their users. | |||
1896 | void setHasMultipleConditionRegisters(bool hasManyRegs = true) { | |||
1897 | HasMultipleConditionRegisters = hasManyRegs; | |||
1898 | } | |||
1899 | ||||
1900 | /// Tells the code generator that the target has BitExtract instructions. | |||
1901 | /// The code generator will aggressively sink "shift"s into the blocks of | |||
1902 | /// their users if the users will generate "and" instructions which can be | |||
1903 | /// combined with "shift" to BitExtract instructions. | |||
1904 | void setHasExtractBitsInsn(bool hasExtractInsn = true) { | |||
1905 | HasExtractBitsInsn = hasExtractInsn; | |||
1906 | } | |||
1907 | ||||
1908 | /// Tells the code generator not to expand logic operations on comparison | |||
1909 | /// predicates into separate sequences that increase the amount of flow | |||
1910 | /// control. | |||
1911 | void setJumpIsExpensive(bool isExpensive = true); | |||
1912 | ||||
1913 | /// Tells the code generator which bitwidths to bypass. | |||
1914 | void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { | |||
1915 | BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; | |||
1916 | } | |||
1917 | ||||
1918 | /// Add the specified register class as an available regclass for the | |||
1919 | /// specified value type. This indicates the selector can handle values of | |||
1920 | /// that class natively. | |||
1921 | void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { | |||
1922 | assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1922, __PRETTY_FUNCTION__)); | |||
1923 | RegClassForVT[VT.SimpleTy] = RC; | |||
1924 | } | |||
1925 | ||||
1926 | /// Return the largest legal super-reg register class of the register class | |||
1927 | /// for the specified type and its associated "cost". | |||
1928 | virtual std::pair<const TargetRegisterClass *, uint8_t> | |||
1929 | findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; | |||
1930 | ||||
1931 | /// Once all of the register classes are added, this allows us to compute | |||
1932 | /// derived properties we expose. | |||
1933 | void computeRegisterProperties(const TargetRegisterInfo *TRI); | |||
1934 | ||||
1935 | /// Indicate that the specified operation does not work with the specified | |||
1936 | /// type and indicate what to do about it. Note that VT may refer to either | |||
1937 | /// the type of a result or that of an operand of Op. | |||
1938 | void setOperationAction(unsigned Op, MVT VT, | |||
1939 | LegalizeAction Action) { | |||
1940 | assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1940, __PRETTY_FUNCTION__)); | |||
1941 | OpActions[(unsigned)VT.SimpleTy][Op] = Action; | |||
1942 | } | |||
1943 | ||||
1944 | /// Indicate that the specified load with extension does not work with the | |||
1945 | /// specified type and indicate what to do about it. | |||
1946 | void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, | |||
1947 | LegalizeAction Action) { | |||
1948 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid () && MemVT.isValid() && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1949, __PRETTY_FUNCTION__)) | |||
1949 | MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid () && MemVT.isValid() && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1949, __PRETTY_FUNCTION__)); | |||
1950 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array" ) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1950, __PRETTY_FUNCTION__)); | |||
1951 | unsigned Shift = 4 * ExtType; | |||
1952 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); | |||
1953 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; | |||
1954 | } | |||
1955 | ||||
1956 | /// Indicate that the specified truncating store does not work with the | |||
1957 | /// specified type and indicate what to do about it. | |||
1958 | void setTruncStoreAction(MVT ValVT, MVT MemVT, | |||
1959 | LegalizeAction Action) { | |||
1960 | assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1960, __PRETTY_FUNCTION__)); | |||
1961 | TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; | |||
1962 | } | |||
1963 | ||||
1964 | /// Indicate that the specified indexed load does or does not work with the | |||
1965 | /// specified type and indicate what to do abort it. | |||
1966 | /// | |||
1967 | /// NOTE: All indexed mode loads are initialized to Expand in | |||
1968 | /// TargetLowering.cpp | |||
1969 | void setIndexedLoadAction(unsigned IdxMode, MVT VT, | |||
1970 | LegalizeAction Action) { | |||
1971 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1972, __PRETTY_FUNCTION__)) | |||
1972 | (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1972, __PRETTY_FUNCTION__)); | |||
1973 | // Load action are kept in the upper half. | |||
1974 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; | |||
1975 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; | |||
1976 | } | |||
1977 | ||||
1978 | /// Indicate that the specified indexed store does or does not work with the | |||
1979 | /// specified type and indicate what to do about it. | |||
1980 | /// | |||
1981 | /// NOTE: All indexed mode stores are initialized to Expand in | |||
1982 | /// TargetLowering.cpp | |||
1983 | void setIndexedStoreAction(unsigned IdxMode, MVT VT, | |||
1984 | LegalizeAction Action) { | |||
1985 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1986, __PRETTY_FUNCTION__)) | |||
1986 | (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1986, __PRETTY_FUNCTION__)); | |||
1987 | // Store action are kept in the lower half. | |||
1988 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; | |||
1989 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); | |||
1990 | } | |||
1991 | ||||
1992 | /// Indicate that the specified condition code is or isn't supported on the | |||
1993 | /// target and indicate what to do about it. | |||
1994 | void setCondCodeAction(ISD::CondCode CC, MVT VT, | |||
1995 | LegalizeAction Action) { | |||
1996 | assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions ) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1997, __PRETTY_FUNCTION__)) | |||
1997 | "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions ) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1997, __PRETTY_FUNCTION__)); | |||
1998 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array" ) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 1998, __PRETTY_FUNCTION__)); | |||
1999 | /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit | |||
2000 | /// value and the upper 29 bits index into the second dimension of the array | |||
2001 | /// to select what 32-bit value to use. | |||
2002 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); | |||
2003 | CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); | |||
2004 | CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; | |||
2005 | } | |||
2006 | ||||
2007 | /// If Opc/OrigVT is specified as being promoted, the promotion code defaults | |||
2008 | /// to trying a larger integer/fp until it can find one that works. If that | |||
2009 | /// default is insufficient, this method can be used by the target to override | |||
2010 | /// the default. | |||
2011 | void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { | |||
2012 | PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; | |||
2013 | } | |||
2014 | ||||
2015 | /// Convenience method to set an operation to Promote and specify the type | |||
2016 | /// in a single call. | |||
2017 | void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { | |||
2018 | setOperationAction(Opc, OrigVT, Promote); | |||
2019 | AddPromotedToType(Opc, OrigVT, DestVT); | |||
2020 | } | |||
2021 | ||||
2022 | /// Targets should invoke this method for each target independent node that | |||
2023 | /// they want to provide a custom DAG combiner for by implementing the | |||
2024 | /// PerformDAGCombine virtual method. | |||
2025 | void setTargetDAGCombine(ISD::NodeType NT) { | |||
2026 | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray )) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2026, __PRETTY_FUNCTION__)); | |||
2027 | TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); | |||
2028 | } | |||
2029 | ||||
2030 | /// Set the target's required jmp_buf buffer size (in bytes); default is 200 | |||
2031 | void setJumpBufSize(unsigned Size) { | |||
2032 | JumpBufSize = Size; | |||
2033 | } | |||
2034 | ||||
2035 | /// Set the target's required jmp_buf buffer alignment (in bytes); default is | |||
2036 | /// 0 | |||
2037 | void setJumpBufAlignment(unsigned Align) { | |||
2038 | JumpBufAlignment = Align; | |||
2039 | } | |||
2040 | ||||
2041 | /// Set the target's minimum function alignment (in log2(bytes)) | |||
2042 | void setMinFunctionAlignment(unsigned Align) { | |||
2043 | MinFunctionAlignment = Align; | |||
2044 | } | |||
2045 | ||||
2046 | /// Set the target's preferred function alignment. This should be set if | |||
2047 | /// there is a performance benefit to higher-than-minimum alignment (in | |||
2048 | /// log2(bytes)) | |||
2049 | void setPrefFunctionAlignment(unsigned Align) { | |||
2050 | PrefFunctionAlignment = Align; | |||
2051 | } | |||
2052 | ||||
2053 | /// Set the target's preferred loop alignment. Default alignment is zero, it | |||
2054 | /// means the target does not care about loop alignment. The alignment is | |||
2055 | /// specified in log2(bytes). The target may also override | |||
2056 | /// getPrefLoopAlignment to provide per-loop values. | |||
2057 | void setPrefLoopAlignment(unsigned Align) { | |||
2058 | PrefLoopAlignment = Align; | |||
2059 | } | |||
2060 | ||||
2061 | /// Set the minimum stack alignment of an argument (in log2(bytes)). | |||
2062 | void setMinStackArgumentAlignment(unsigned Align) { | |||
2063 | MinStackArgumentAlignment = Align; | |||
2064 | } | |||
2065 | ||||
2066 | /// Set the maximum atomic operation size supported by the | |||
2067 | /// backend. Atomic operations greater than this size (as well as | |||
2068 | /// ones that are not naturally aligned), will be expanded by | |||
2069 | /// AtomicExpandPass into an __atomic_* library call. | |||
2070 | void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { | |||
2071 | MaxAtomicSizeInBitsSupported = SizeInBits; | |||
2072 | } | |||
2073 | ||||
2074 | /// Sets the minimum cmpxchg or ll/sc size supported by the backend. | |||
2075 | void setMinCmpXchgSizeInBits(unsigned SizeInBits) { | |||
2076 | MinCmpXchgSizeInBits = SizeInBits; | |||
2077 | } | |||
2078 | ||||
2079 | /// Sets whether unaligned atomic operations are supported. | |||
2080 | void setSupportsUnalignedAtomics(bool UnalignedSupported) { | |||
2081 | SupportsUnalignedAtomics = UnalignedSupported; | |||
2082 | } | |||
2083 | ||||
2084 | public: | |||
2085 | //===--------------------------------------------------------------------===// | |||
2086 | // Addressing mode description hooks (used by LSR etc). | |||
2087 | // | |||
2088 | ||||
2089 | /// CodeGenPrepare sinks address calculations into the same BB as Load/Store | |||
2090 | /// instructions reading the address. This allows as much computation as | |||
2091 | /// possible to be done in the address mode for that operand. This hook lets | |||
2092 | /// targets also pass back when this should be done on intrinsics which | |||
2093 | /// load/store. | |||
2094 | virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, | |||
2095 | SmallVectorImpl<Value*> &/*Ops*/, | |||
2096 | Type *&/*AccessTy*/) const { | |||
2097 | return false; | |||
2098 | } | |||
2099 | ||||
2100 | /// This represents an addressing mode of: | |||
2101 | /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg | |||
2102 | /// If BaseGV is null, there is no BaseGV. | |||
2103 | /// If BaseOffs is zero, there is no base offset. | |||
2104 | /// If HasBaseReg is false, there is no base register. | |||
2105 | /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with | |||
2106 | /// no scale. | |||
2107 | struct AddrMode { | |||
2108 | GlobalValue *BaseGV = nullptr; | |||
2109 | int64_t BaseOffs = 0; | |||
2110 | bool HasBaseReg = false; | |||
2111 | int64_t Scale = 0; | |||
2112 | AddrMode() = default; | |||
2113 | }; | |||
2114 | ||||
2115 | /// Return true if the addressing mode represented by AM is legal for this | |||
2116 | /// target, for a load/store of the specified type. | |||
2117 | /// | |||
2118 | /// The type may be VoidTy, in which case only return true if the addressing | |||
2119 | /// mode is legal for a load/store of any legal type. TODO: Handle | |||
2120 | /// pre/postinc as well. | |||
2121 | /// | |||
2122 | /// If the address space cannot be determined, it will be -1. | |||
2123 | /// | |||
2124 | /// TODO: Remove default argument | |||
2125 | virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, | |||
2126 | Type *Ty, unsigned AddrSpace, | |||
2127 | Instruction *I = nullptr) const; | |||
2128 | ||||
2129 | /// Return the cost of the scaling factor used in the addressing mode | |||
2130 | /// represented by AM for this target, for a load/store of the specified type. | |||
2131 | /// | |||
2132 | /// If the AM is supported, the return value must be >= 0. | |||
2133 | /// If the AM is not supported, it returns a negative value. | |||
2134 | /// TODO: Handle pre/postinc as well. | |||
2135 | /// TODO: Remove default argument | |||
2136 | virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, | |||
2137 | Type *Ty, unsigned AS = 0) const { | |||
2138 | // Default: assume that any scaling factor used in a legal AM is free. | |||
2139 | if (isLegalAddressingMode(DL, AM, Ty, AS)) | |||
2140 | return 0; | |||
2141 | return -1; | |||
2142 | } | |||
2143 | ||||
2144 | /// Return true if the specified immediate is legal icmp immediate, that is | |||
2145 | /// the target has icmp instructions which can compare a register against the | |||
2146 | /// immediate without having to materialize the immediate into a register. | |||
2147 | virtual bool isLegalICmpImmediate(int64_t) const { | |||
2148 | return true; | |||
2149 | } | |||
2150 | ||||
2151 | /// Return true if the specified immediate is legal add immediate, that is the | |||
2152 | /// target has add instructions which can add a register with the immediate | |||
2153 | /// without having to materialize the immediate into a register. | |||
2154 | virtual bool isLegalAddImmediate(int64_t) const { | |||
2155 | return true; | |||
2156 | } | |||
2157 | ||||
2158 | /// Return true if the specified immediate is legal for the value input of a | |||
2159 | /// store instruction. | |||
2160 | virtual bool isLegalStoreImmediate(int64_t Value) const { | |||
2161 | // Default implementation assumes that at least 0 works since it is likely | |||
2162 | // that a zero register exists or a zero immediate is allowed. | |||
2163 | return Value == 0; | |||
2164 | } | |||
2165 | ||||
2166 | /// Return true if it's significantly cheaper to shift a vector by a uniform | |||
2167 | /// scalar than by an amount which will vary across each lane. On x86, for | |||
2168 | /// example, there is a "psllw" instruction for the former case, but no simple | |||
2169 | /// instruction for a general "a << b" operation on vectors. | |||
2170 | virtual bool isVectorShiftByScalarCheap(Type *Ty) const { | |||
2171 | return false; | |||
2172 | } | |||
2173 | ||||
2174 | /// Returns true if the opcode is a commutative binary operation. | |||
2175 | virtual bool isCommutativeBinOp(unsigned Opcode) const { | |||
2176 | // FIXME: This should get its info from the td file. | |||
2177 | switch (Opcode) { | |||
2178 | case ISD::ADD: | |||
2179 | case ISD::SMIN: | |||
2180 | case ISD::SMAX: | |||
2181 | case ISD::UMIN: | |||
2182 | case ISD::UMAX: | |||
2183 | case ISD::MUL: | |||
2184 | case ISD::MULHU: | |||
2185 | case ISD::MULHS: | |||
2186 | case ISD::SMUL_LOHI: | |||
2187 | case ISD::UMUL_LOHI: | |||
2188 | case ISD::FADD: | |||
2189 | case ISD::FMUL: | |||
2190 | case ISD::AND: | |||
2191 | case ISD::OR: | |||
2192 | case ISD::XOR: | |||
2193 | case ISD::SADDO: | |||
2194 | case ISD::UADDO: | |||
2195 | case ISD::ADDC: | |||
2196 | case ISD::ADDE: | |||
2197 | case ISD::SADDSAT: | |||
2198 | case ISD::UADDSAT: | |||
2199 | case ISD::FMINNUM: | |||
2200 | case ISD::FMAXNUM: | |||
2201 | case ISD::FMINIMUM: | |||
2202 | case ISD::FMAXIMUM: | |||
2203 | return true; | |||
2204 | default: return false; | |||
2205 | } | |||
2206 | } | |||
2207 | ||||
2208 | /// Return true if it's free to truncate a value of type FromTy to type | |||
2209 | /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 | |||
2210 | /// by referencing its sub-register AX. | |||
2211 | /// Targets must return false when FromTy <= ToTy. | |||
2212 | virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { | |||
2213 | return false; | |||
2214 | } | |||
2215 | ||||
2216 | /// Return true if a truncation from FromTy to ToTy is permitted when deciding | |||
2217 | /// whether a call is in tail position. Typically this means that both results | |||
2218 | /// would be assigned to the same register or stack slot, but it could mean | |||
2219 | /// the target performs adequate checks of its own before proceeding with the | |||
2220 | /// tail call. Targets must return false when FromTy <= ToTy. | |||
2221 | virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { | |||
2222 | return false; | |||
2223 | } | |||
2224 | ||||
2225 | virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { | |||
2226 | return false; | |||
2227 | } | |||
2228 | ||||
2229 | virtual bool isProfitableToHoist(Instruction *I) const { return true; } | |||
2230 | ||||
2231 | /// Return true if the extension represented by \p I is free. | |||
2232 | /// Unlikely the is[Z|FP]ExtFree family which is based on types, | |||
2233 | /// this method can use the context provided by \p I to decide | |||
2234 | /// whether or not \p I is free. | |||
2235 | /// This method extends the behavior of the is[Z|FP]ExtFree family. | |||
2236 | /// In other words, if is[Z|FP]Free returns true, then this method | |||
2237 | /// returns true as well. The converse is not true. | |||
2238 | /// The target can perform the adequate checks by overriding isExtFreeImpl. | |||
2239 | /// \pre \p I must be a sign, zero, or fp extension. | |||
2240 | bool isExtFree(const Instruction *I) const { | |||
2241 | switch (I->getOpcode()) { | |||
2242 | case Instruction::FPExt: | |||
2243 | if (isFPExtFree(EVT::getEVT(I->getType()), | |||
2244 | EVT::getEVT(I->getOperand(0)->getType()))) | |||
2245 | return true; | |||
2246 | break; | |||
2247 | case Instruction::ZExt: | |||
2248 | if (isZExtFree(I->getOperand(0)->getType(), I->getType())) | |||
2249 | return true; | |||
2250 | break; | |||
2251 | case Instruction::SExt: | |||
2252 | break; | |||
2253 | default: | |||
2254 | llvm_unreachable("Instruction is not an extension")::llvm::llvm_unreachable_internal("Instruction is not an extension" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2254); | |||
2255 | } | |||
2256 | return isExtFreeImpl(I); | |||
2257 | } | |||
2258 | ||||
2259 | /// Return true if \p Load and \p Ext can form an ExtLoad. | |||
2260 | /// For example, in AArch64 | |||
2261 | /// %L = load i8, i8* %ptr | |||
2262 | /// %E = zext i8 %L to i32 | |||
2263 | /// can be lowered into one load instruction | |||
2264 | /// ldrb w0, [x0] | |||
2265 | bool isExtLoad(const LoadInst *Load, const Instruction *Ext, | |||
2266 | const DataLayout &DL) const { | |||
2267 | EVT VT = getValueType(DL, Ext->getType()); | |||
2268 | EVT LoadVT = getValueType(DL, Load->getType()); | |||
2269 | ||||
2270 | // If the load has other users and the truncate is not free, the ext | |||
2271 | // probably isn't free. | |||
2272 | if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && | |||
2273 | !isTruncateFree(Ext->getType(), Load->getType())) | |||
2274 | return false; | |||
2275 | ||||
2276 | // Check whether the target supports casts folded into loads. | |||
2277 | unsigned LType; | |||
2278 | if (isa<ZExtInst>(Ext)) | |||
2279 | LType = ISD::ZEXTLOAD; | |||
2280 | else { | |||
2281 | assert(isa<SExtInst>(Ext) && "Unexpected ext type!")((isa<SExtInst>(Ext) && "Unexpected ext type!") ? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Ext) && \"Unexpected ext type!\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2281, __PRETTY_FUNCTION__)); | |||
2282 | LType = ISD::SEXTLOAD; | |||
2283 | } | |||
2284 | ||||
2285 | return isLoadExtLegal(LType, VT, LoadVT); | |||
2286 | } | |||
2287 | ||||
2288 | /// Return true if any actual instruction that defines a value of type FromTy | |||
2289 | /// implicitly zero-extends the value to ToTy in the result register. | |||
2290 | /// | |||
2291 | /// The function should return true when it is likely that the truncate can | |||
2292 | /// be freely folded with an instruction defining a value of FromTy. If | |||
2293 | /// the defining instruction is unknown (because you're looking at a | |||
2294 | /// function argument, PHI, etc.) then the target may require an | |||
2295 | /// explicit truncate, which is not necessarily free, but this function | |||
2296 | /// does not deal with those cases. | |||
2297 | /// Targets must return false when FromTy >= ToTy. | |||
2298 | virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { | |||
2299 | return false; | |||
2300 | } | |||
2301 | ||||
2302 | virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { | |||
2303 | return false; | |||
2304 | } | |||
2305 | ||||
2306 | /// Return true if sign-extension from FromTy to ToTy is cheaper than | |||
2307 | /// zero-extension. | |||
2308 | virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { | |||
2309 | return false; | |||
2310 | } | |||
2311 | ||||
2312 | /// Return true if sinking I's operands to the same basic block as I is | |||
2313 | /// profitable, e.g. because the operands can be folded into a target | |||
2314 | /// instruction during instruction selection. After calling the function | |||
2315 | /// \p Ops contains the Uses to sink ordered by dominance (dominating users | |||
2316 | /// come first). | |||
2317 | virtual bool shouldSinkOperands(Instruction *I, | |||
2318 | SmallVectorImpl<Use *> &Ops) const { | |||
2319 | return false; | |||
2320 | } | |||
2321 | ||||
2322 | /// Return true if the target supplies and combines to a paired load | |||
2323 | /// two loaded values of type LoadedType next to each other in memory. | |||
2324 | /// RequiredAlignment gives the minimal alignment constraints that must be met | |||
2325 | /// to be able to select this paired load. | |||
2326 | /// | |||
2327 | /// This information is *not* used to generate actual paired loads, but it is | |||
2328 | /// used to generate a sequence of loads that is easier to combine into a | |||
2329 | /// paired load. | |||
2330 | /// For instance, something like this: | |||
2331 | /// a = load i64* addr | |||
2332 | /// b = trunc i64 a to i32 | |||
2333 | /// c = lshr i64 a, 32 | |||
2334 | /// d = trunc i64 c to i32 | |||
2335 | /// will be optimized into: | |||
2336 | /// b = load i32* addr1 | |||
2337 | /// d = load i32* addr2 | |||
2338 | /// Where addr1 = addr2 +/- sizeof(i32). | |||
2339 | /// | |||
2340 | /// In other words, unless the target performs a post-isel load combining, | |||
2341 | /// this information should not be provided because it will generate more | |||
2342 | /// loads. | |||
2343 | virtual bool hasPairedLoad(EVT /*LoadedType*/, | |||
2344 | unsigned & /*RequiredAlignment*/) const { | |||
2345 | return false; | |||
2346 | } | |||
2347 | ||||
2348 | /// Return true if the target has a vector blend instruction. | |||
2349 | virtual bool hasVectorBlend() const { return false; } | |||
2350 | ||||
2351 | /// Get the maximum supported factor for interleaved memory accesses. | |||
2352 | /// Default to be the minimum interleave factor: 2. | |||
2353 | virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } | |||
2354 | ||||
2355 | /// Lower an interleaved load to target specific intrinsics. Return | |||
2356 | /// true on success. | |||
2357 | /// | |||
2358 | /// \p LI is the vector load instruction. | |||
2359 | /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. | |||
2360 | /// \p Indices is the corresponding indices for each shufflevector. | |||
2361 | /// \p Factor is the interleave factor. | |||
2362 | virtual bool lowerInterleavedLoad(LoadInst *LI, | |||
2363 | ArrayRef<ShuffleVectorInst *> Shuffles, | |||
2364 | ArrayRef<unsigned> Indices, | |||
2365 | unsigned Factor) const { | |||
2366 | return false; | |||
2367 | } | |||
2368 | ||||
2369 | /// Lower an interleaved store to target specific intrinsics. Return | |||
2370 | /// true on success. | |||
2371 | /// | |||
2372 | /// \p SI is the vector store instruction. | |||
2373 | /// \p SVI is the shufflevector to RE-interleave the stored vector. | |||
2374 | /// \p Factor is the interleave factor. | |||
2375 | virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, | |||
2376 | unsigned Factor) const { | |||
2377 | return false; | |||
2378 | } | |||
2379 | ||||
2380 | /// Return true if zero-extending the specific node Val to type VT2 is free | |||
2381 | /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or | |||
2382 | /// because it's folded such as X86 zero-extending loads). | |||
2383 | virtual bool isZExtFree(SDValue Val, EVT VT2) const { | |||
2384 | return isZExtFree(Val.getValueType(), VT2); | |||
2385 | } | |||
2386 | ||||
2387 | /// Return true if an fpext operation is free (for instance, because | |||
2388 | /// single-precision floating-point numbers are implicitly extended to | |||
2389 | /// double-precision). | |||
2390 | virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { | |||
2391 | assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2392, __PRETTY_FUNCTION__)) | |||
2392 | "invalid fpext types")((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2392, __PRETTY_FUNCTION__)); | |||
2393 | return false; | |||
2394 | } | |||
2395 | ||||
2396 | /// Return true if an fpext operation input to an \p Opcode operation is free | |||
2397 | /// (for instance, because half-precision floating-point numbers are | |||
2398 | /// implicitly extended to float-precision) for an FMA instruction. | |||
2399 | virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const { | |||
2400 | assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2401, __PRETTY_FUNCTION__)) | |||
2401 | "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2401, __PRETTY_FUNCTION__)); | |||
2402 | return isFPExtFree(DestVT, SrcVT); | |||
2403 | } | |||
2404 | ||||
2405 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any | |||
2406 | /// extend node) is profitable. | |||
2407 | virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } | |||
2408 | ||||
2409 | /// Return true if an fneg operation is free to the point where it is never | |||
2410 | /// worthwhile to replace it with a bitwise operation. | |||
2411 | virtual bool isFNegFree(EVT VT) const { | |||
2412 | assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2412, __PRETTY_FUNCTION__)); | |||
2413 | return false; | |||
2414 | } | |||
2415 | ||||
2416 | /// Return true if an fabs operation is free to the point where it is never | |||
2417 | /// worthwhile to replace it with a bitwise operation. | |||
2418 | virtual bool isFAbsFree(EVT VT) const { | |||
2419 | assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2419, __PRETTY_FUNCTION__)); | |||
2420 | return false; | |||
2421 | } | |||
2422 | ||||
2423 | /// Return true if an FMA operation is faster than a pair of fmul and fadd | |||
2424 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method | |||
2425 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. | |||
2426 | /// | |||
2427 | /// NOTE: This may be called before legalization on types for which FMAs are | |||
2428 | /// not legal, but should return true if those types will eventually legalize | |||
2429 | /// to types that support FMAs. After legalization, it will only be called on | |||
2430 | /// types that support FMAs (via Legal or Custom actions) | |||
2431 | virtual bool isFMAFasterThanFMulAndFAdd(EVT) const { | |||
2432 | return false; | |||
2433 | } | |||
2434 | ||||
2435 | /// Return true if it's profitable to narrow operations of type VT1 to | |||
2436 | /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from | |||
2437 | /// i32 to i16. | |||
2438 | virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const { | |||
2439 | return false; | |||
2440 | } | |||
2441 | ||||
2442 | /// Return true if it is beneficial to convert a load of a constant to | |||
2443 | /// just the constant itself. | |||
2444 | /// On some targets it might be more efficient to use a combination of | |||
2445 | /// arithmetic instructions to materialize the constant instead of loading it | |||
2446 | /// from a constant pool. | |||
2447 | virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, | |||
2448 | Type *Ty) const { | |||
2449 | return false; | |||
2450 | } | |||
2451 | ||||
2452 | /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type | |||
2453 | /// from this source type with this index. This is needed because | |||
2454 | /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of | |||
2455 | /// the first element, and only the target knows which lowering is cheap. | |||
2456 | virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, | |||
2457 | unsigned Index) const { | |||
2458 | return false; | |||
2459 | } | |||
2460 | ||||
2461 | /// Try to convert an extract element of a vector binary operation into an | |||
2462 | /// extract element followed by a scalar operation. | |||
2463 | virtual bool shouldScalarizeBinop(SDValue VecOp) const { | |||
2464 | return false; | |||
2465 | } | |||
2466 | ||||
2467 | /// Return true if extraction of a scalar element from the given vector type | |||
2468 | /// at the given index is cheap. For example, if scalar operations occur on | |||
2469 | /// the same register file as vector operations, then an extract element may | |||
2470 | /// be a sub-register rename rather than an actual instruction. | |||
2471 | virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const { | |||
2472 | return false; | |||
2473 | } | |||
2474 | ||||
2475 | /// Try to convert math with an overflow comparison into the corresponding DAG | |||
2476 | /// node operation. Targets may want to override this independently of whether | |||
2477 | /// the operation is legal/custom for the given type because it may obscure | |||
2478 | /// matching of other patterns. | |||
2479 | virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const { | |||
2480 | // TODO: The default logic is inherited from code in CodeGenPrepare. | |||
2481 | // The opcode should not make a difference by default? | |||
2482 | if (Opcode != ISD::UADDO) | |||
2483 | return false; | |||
2484 | ||||
2485 | // Allow the transform as long as we have an integer type that is not | |||
2486 | // obviously illegal and unsupported. | |||
2487 | if (VT.isVector()) | |||
2488 | return false; | |||
2489 | return VT.isSimple() || !isOperationExpand(Opcode, VT); | |||
2490 | } | |||
2491 | ||||
2492 | // Return true if it is profitable to use a scalar input to a BUILD_VECTOR | |||
2493 | // even if the vector itself has multiple uses. | |||
2494 | virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { | |||
2495 | return false; | |||
2496 | } | |||
2497 | ||||
2498 | // Return true if CodeGenPrepare should consider splitting large offset of a | |||
2499 | // GEP to make the GEP fit into the addressing mode and can be sunk into the | |||
2500 | // same blocks of its users. | |||
2501 | virtual bool shouldConsiderGEPOffsetSplit() const { return false; } | |||
2502 | ||||
2503 | //===--------------------------------------------------------------------===// | |||
2504 | // Runtime Library hooks | |||
2505 | // | |||
2506 | ||||
2507 | /// Rename the default libcall routine name for the specified libcall. | |||
2508 | void setLibcallName(RTLIB::Libcall Call, const char *Name) { | |||
2509 | LibcallRoutineNames[Call] = Name; | |||
2510 | } | |||
2511 | ||||
2512 | /// Get the libcall routine name for the specified libcall. | |||
2513 | const char *getLibcallName(RTLIB::Libcall Call) const { | |||
2514 | return LibcallRoutineNames[Call]; | |||
2515 | } | |||
2516 | ||||
2517 | /// Override the default CondCode to be used to test the result of the | |||
2518 | /// comparison libcall against zero. | |||
2519 | void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { | |||
2520 | CmpLibcallCCs[Call] = CC; | |||
2521 | } | |||
2522 | ||||
2523 | /// Get the CondCode that's to be used to test the result of the comparison | |||
2524 | /// libcall against zero. | |||
2525 | ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { | |||
2526 | return CmpLibcallCCs[Call]; | |||
2527 | } | |||
2528 | ||||
2529 | /// Set the CallingConv that should be used for the specified libcall. | |||
2530 | void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { | |||
2531 | LibcallCallingConvs[Call] = CC; | |||
2532 | } | |||
2533 | ||||
2534 | /// Get the CallingConv that should be used for the specified libcall. | |||
2535 | CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { | |||
2536 | return LibcallCallingConvs[Call]; | |||
2537 | } | |||
2538 | ||||
2539 | /// Execute target specific actions to finalize target lowering. | |||
2540 | /// This is used to set extra flags in MachineFrameInformation and freezing | |||
2541 | /// the set of reserved registers. | |||
2542 | /// The default implementation just freezes the set of reserved registers. | |||
2543 | virtual void finalizeLowering(MachineFunction &MF) const; | |||
2544 | ||||
2545 | private: | |||
2546 | const TargetMachine &TM; | |||
2547 | ||||
2548 | /// Tells the code generator that the target has multiple (allocatable) | |||
2549 | /// condition registers that can be used to store the results of comparisons | |||
2550 | /// for use by selects and conditional branches. With multiple condition | |||
2551 | /// registers, the code generator will not aggressively sink comparisons into | |||
2552 | /// the blocks of their users. | |||
2553 | bool HasMultipleConditionRegisters; | |||
2554 | ||||
2555 | /// Tells the code generator that the target has BitExtract instructions. | |||
2556 | /// The code generator will aggressively sink "shift"s into the blocks of | |||
2557 | /// their users if the users will generate "and" instructions which can be | |||
2558 | /// combined with "shift" to BitExtract instructions. | |||
2559 | bool HasExtractBitsInsn; | |||
2560 | ||||
2561 | /// Tells the code generator to bypass slow divide or remainder | |||
2562 | /// instructions. For example, BypassSlowDivWidths[32,8] tells the code | |||
2563 | /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer | |||
2564 | /// div/rem when the operands are positive and less than 256. | |||
2565 | DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; | |||
2566 | ||||
2567 | /// Tells the code generator that it shouldn't generate extra flow control | |||
2568 | /// instructions and should attempt to combine flow control instructions via | |||
2569 | /// predication. | |||
2570 | bool JumpIsExpensive; | |||
2571 | ||||
2572 | /// This target prefers to use _setjmp to implement llvm.setjmp. | |||
2573 | /// | |||
2574 | /// Defaults to false. | |||
2575 | bool UseUnderscoreSetJmp; | |||
2576 | ||||
2577 | /// This target prefers to use _longjmp to implement llvm.longjmp. | |||
2578 | /// | |||
2579 | /// Defaults to false. | |||
2580 | bool UseUnderscoreLongJmp; | |||
2581 | ||||
2582 | /// Information about the contents of the high-bits in boolean values held in | |||
2583 | /// a type wider than i1. See getBooleanContents. | |||
2584 | BooleanContent BooleanContents; | |||
2585 | ||||
2586 | /// Information about the contents of the high-bits in boolean values held in | |||
2587 | /// a type wider than i1. See getBooleanContents. | |||
2588 | BooleanContent BooleanFloatContents; | |||
2589 | ||||
2590 | /// Information about the contents of the high-bits in boolean vector values | |||
2591 | /// when the element type is wider than i1. See getBooleanContents. | |||
2592 | BooleanContent BooleanVectorContents; | |||
2593 | ||||
2594 | /// The target scheduling preference: shortest possible total cycles or lowest | |||
2595 | /// register usage. | |||
2596 | Sched::Preference SchedPreferenceInfo; | |||
2597 | ||||
2598 | /// The size, in bytes, of the target's jmp_buf buffers | |||
2599 | unsigned JumpBufSize; | |||
2600 | ||||
2601 | /// The alignment, in bytes, of the target's jmp_buf buffers | |||
2602 | unsigned JumpBufAlignment; | |||
2603 | ||||
2604 | /// The minimum alignment that any argument on the stack needs to have. | |||
2605 | unsigned MinStackArgumentAlignment; | |||
2606 | ||||
2607 | /// The minimum function alignment (used when optimizing for size, and to | |||
2608 | /// prevent explicitly provided alignment from leading to incorrect code). | |||
2609 | unsigned MinFunctionAlignment; | |||
2610 | ||||
2611 | /// The preferred function alignment (used when alignment unspecified and | |||
2612 | /// optimizing for speed). | |||
2613 | unsigned PrefFunctionAlignment; | |||
2614 | ||||
2615 | /// The preferred loop alignment. | |||
2616 | unsigned PrefLoopAlignment; | |||
2617 | ||||
2618 | /// Size in bits of the maximum atomics size the backend supports. | |||
2619 | /// Accesses larger than this will be expanded by AtomicExpandPass. | |||
2620 | unsigned MaxAtomicSizeInBitsSupported; | |||
2621 | ||||
2622 | /// Size in bits of the minimum cmpxchg or ll/sc operation the | |||
2623 | /// backend supports. | |||
2624 | unsigned MinCmpXchgSizeInBits; | |||
2625 | ||||
2626 | /// This indicates if the target supports unaligned atomic operations. | |||
2627 | bool SupportsUnalignedAtomics; | |||
2628 | ||||
2629 | /// If set to a physical register, this specifies the register that | |||
2630 | /// llvm.savestack/llvm.restorestack should save and restore. | |||
2631 | unsigned StackPointerRegisterToSaveRestore; | |||
2632 | ||||
2633 | /// This indicates the default register class to use for each ValueType the | |||
2634 | /// target supports natively. | |||
2635 | const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; | |||
2636 | unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE]; | |||
2637 | MVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; | |||
2638 | ||||
2639 | /// This indicates the "representative" register class to use for each | |||
2640 | /// ValueType the target supports natively. This information is used by the | |||
2641 | /// scheduler to track register pressure. By default, the representative | |||
2642 | /// register class is the largest legal super-reg register class of the | |||
2643 | /// register class of the specified type. e.g. On x86, i8, i16, and i32's | |||
2644 | /// representative class would be GR32. | |||
2645 | const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE]; | |||
2646 | ||||
2647 | /// This indicates the "cost" of the "representative" register class for each | |||
2648 | /// ValueType. The cost is used by the scheduler to approximate register | |||
2649 | /// pressure. | |||
2650 | uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE]; | |||
2651 | ||||
2652 | /// For any value types we are promoting or expanding, this contains the value | |||
2653 | /// type that we are changing to. For Expanded types, this contains one step | |||
2654 | /// of the expand (e.g. i64 -> i32), even if there are multiple steps required | |||
2655 | /// (e.g. i64 -> i16). For types natively supported by the system, this holds | |||
2656 | /// the same type (e.g. i32 -> i32). | |||
2657 | MVT TransformToType[MVT::LAST_VALUETYPE]; | |||
2658 | ||||
2659 | /// For each operation and each value type, keep a LegalizeAction that | |||
2660 | /// indicates how instruction selection should deal with the operation. Most | |||
2661 | /// operations are Legal (aka, supported natively by the target), but | |||
2662 | /// operations that are not should be described. Note that operations on | |||
2663 | /// non-legal value types are not described here. | |||
2664 | LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; | |||
2665 | ||||
2666 | /// For each load extension type and each value type, keep a LegalizeAction | |||
2667 | /// that indicates how instruction selection should deal with a load of a | |||
2668 | /// specific value type and extension type. Uses 4-bits to store the action | |||
2669 | /// for each of the 4 load ext types. | |||
2670 | uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; | |||
2671 | ||||
2672 | /// For each value type pair keep a LegalizeAction that indicates whether a | |||
2673 | /// truncating store of a specific value type and truncating type is legal. | |||
2674 | LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; | |||
2675 | ||||
2676 | /// For each indexed mode and each value type, keep a pair of LegalizeAction | |||
2677 | /// that indicates how instruction selection should deal with the load / | |||
2678 | /// store. | |||
2679 | /// | |||
2680 | /// The first dimension is the value_type for the reference. The second | |||
2681 | /// dimension represents the various modes for load store. | |||
2682 | uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; | |||
2683 | ||||
2684 | /// For each condition code (ISD::CondCode) keep a LegalizeAction that | |||
2685 | /// indicates how instruction selection should deal with the condition code. | |||
2686 | /// | |||
2687 | /// Because each CC action takes up 4 bits, we need to have the array size be | |||
2688 | /// large enough to fit all of the value types. This can be done by rounding | |||
2689 | /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. | |||
2690 | uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; | |||
2691 | ||||
2692 | protected: | |||
2693 | ValueTypeActionImpl ValueTypeActions; | |||
2694 | ||||
2695 | private: | |||
2696 | LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; | |||
2697 | ||||
2698 | /// Targets can specify ISD nodes that they would like PerformDAGCombine | |||
2699 | /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this | |||
2700 | /// array. | |||
2701 | unsigned char | |||
2702 | TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT8-1)/CHAR_BIT8]; | |||
2703 | ||||
2704 | /// For operations that must be promoted to a specific type, this holds the | |||
2705 | /// destination type. This map should be sparse, so don't hold it as an | |||
2706 | /// array. | |||
2707 | /// | |||
2708 | /// Targets add entries to this map with AddPromotedToType(..), clients access | |||
2709 | /// this with getTypeToPromoteTo(..). | |||
2710 | std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> | |||
2711 | PromoteToType; | |||
2712 | ||||
2713 | /// Stores the name each libcall. | |||
2714 | const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; | |||
2715 | ||||
2716 | /// The ISD::CondCode that should be used to test the result of each of the | |||
2717 | /// comparison libcall against zero. | |||
2718 | ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; | |||
2719 | ||||
2720 | /// Stores the CallingConv that should be used for each libcall. | |||
2721 | CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; | |||
2722 | ||||
2723 | /// Set default libcall names and calling conventions. | |||
2724 | void InitLibcalls(const Triple &TT); | |||
2725 | ||||
2726 | protected: | |||
2727 | /// Return true if the extension represented by \p I is free. | |||
2728 | /// \pre \p I is a sign, zero, or fp extension and | |||
2729 | /// is[Z|FP]ExtFree of the related types is not true. | |||
2730 | virtual bool isExtFreeImpl(const Instruction *I) const { return false; } | |||
2731 | ||||
2732 | /// Depth that GatherAllAliases should should continue looking for chain | |||
2733 | /// dependencies when trying to find a more preferable chain. As an | |||
2734 | /// approximation, this should be more than the number of consecutive stores | |||
2735 | /// expected to be merged. | |||
2736 | unsigned GatherAllAliasesMaxDepth; | |||
2737 | ||||
2738 | /// Specify maximum number of store instructions per memset call. | |||
2739 | /// | |||
2740 | /// When lowering \@llvm.memset this field specifies the maximum number of | |||
2741 | /// store operations that may be substituted for the call to memset. Targets | |||
2742 | /// must set this value based on the cost threshold for that target. Targets | |||
2743 | /// should assume that the memset will be done using as many of the largest | |||
2744 | /// store operations first, followed by smaller ones, if necessary, per | |||
2745 | /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine | |||
2746 | /// with 16-bit alignment would result in four 2-byte stores and one 1-byte | |||
2747 | /// store. This only applies to setting a constant array of a constant size. | |||
2748 | unsigned MaxStoresPerMemset; | |||
2749 | ||||
2750 | /// Maximum number of stores operations that may be substituted for the call | |||
2751 | /// to memset, used for functions with OptSize attribute. | |||
2752 | unsigned MaxStoresPerMemsetOptSize; | |||
2753 | ||||
2754 | /// Specify maximum bytes of store instructions per memcpy call. | |||
2755 | /// | |||
2756 | /// When lowering \@llvm.memcpy this field specifies the maximum number of | |||
2757 | /// store operations that may be substituted for a call to memcpy. Targets | |||
2758 | /// must set this value based on the cost threshold for that target. Targets | |||
2759 | /// should assume that the memcpy will be done using as many of the largest | |||
2760 | /// store operations first, followed by smaller ones, if necessary, per | |||
2761 | /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine | |||
2762 | /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store | |||
2763 | /// and one 1-byte store. This only applies to copying a constant array of | |||
2764 | /// constant size. | |||
2765 | unsigned MaxStoresPerMemcpy; | |||
2766 | ||||
2767 | ||||
2768 | /// \brief Specify max number of store instructions to glue in inlined memcpy. | |||
2769 | /// | |||
2770 | /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number | |||
2771 | /// of store instructions to keep together. This helps in pairing and | |||
2772 | // vectorization later on. | |||
2773 | unsigned MaxGluedStoresPerMemcpy = 0; | |||
2774 | ||||
2775 | /// Maximum number of store operations that may be substituted for a call to | |||
2776 | /// memcpy, used for functions with OptSize attribute. | |||
2777 | unsigned MaxStoresPerMemcpyOptSize; | |||
2778 | unsigned MaxLoadsPerMemcmp; | |||
2779 | unsigned MaxLoadsPerMemcmpOptSize; | |||
2780 | ||||
2781 | /// Specify maximum bytes of store instructions per memmove call. | |||
2782 | /// | |||
2783 | /// When lowering \@llvm.memmove this field specifies the maximum number of | |||
2784 | /// store instructions that may be substituted for a call to memmove. Targets | |||
2785 | /// must set this value based on the cost threshold for that target. Targets | |||
2786 | /// should assume that the memmove will be done using as many of the largest | |||
2787 | /// store operations first, followed by smaller ones, if necessary, per | |||
2788 | /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine | |||
2789 | /// with 8-bit alignment would result in nine 1-byte stores. This only | |||
2790 | /// applies to copying a constant array of constant size. | |||
2791 | unsigned MaxStoresPerMemmove; | |||
2792 | ||||
2793 | /// Maximum number of store instructions that may be substituted for a call to | |||
2794 | /// memmove, used for functions with OptSize attribute. | |||
2795 | unsigned MaxStoresPerMemmoveOptSize; | |||
2796 | ||||
2797 | /// Tells the code generator that select is more expensive than a branch if | |||
2798 | /// the branch is usually predicted right. | |||
2799 | bool PredictableSelectIsExpensive; | |||
2800 | ||||
2801 | /// \see enableExtLdPromotion. | |||
2802 | bool EnableExtLdPromotion; | |||
2803 | ||||
2804 | /// Return true if the value types that can be represented by the specified | |||
2805 | /// register class are all legal. | |||
2806 | bool isLegalRC(const TargetRegisterInfo &TRI, | |||
2807 | const TargetRegisterClass &RC) const; | |||
2808 | ||||
2809 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent | |||
2810 | /// sequence of memory operands that is recognized by PrologEpilogInserter. | |||
2811 | MachineBasicBlock *emitPatchPoint(MachineInstr &MI, | |||
2812 | MachineBasicBlock *MBB) const; | |||
2813 | ||||
2814 | /// Replace/modify the XRay custom event operands with target-dependent | |||
2815 | /// details. | |||
2816 | MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, | |||
2817 | MachineBasicBlock *MBB) const; | |||
2818 | ||||
2819 | /// Replace/modify the XRay typed event operands with target-dependent | |||
2820 | /// details. | |||
2821 | MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, | |||
2822 | MachineBasicBlock *MBB) const; | |||
2823 | }; | |||
2824 | ||||
2825 | /// This class defines information used to lower LLVM code to legal SelectionDAG | |||
2826 | /// operators that the target instruction selector can accept natively. | |||
2827 | /// | |||
2828 | /// This class also defines callbacks that targets must implement to lower | |||
2829 | /// target-specific constructs to SelectionDAG operators. | |||
2830 | class TargetLowering : public TargetLoweringBase { | |||
2831 | public: | |||
2832 | struct DAGCombinerInfo; | |||
2833 | ||||
2834 | TargetLowering(const TargetLowering &) = delete; | |||
2835 | TargetLowering &operator=(const TargetLowering &) = delete; | |||
2836 | ||||
2837 | /// NOTE: The TargetMachine owns TLOF. | |||
2838 | explicit TargetLowering(const TargetMachine &TM); | |||
2839 | ||||
2840 | bool isPositionIndependent() const; | |||
2841 | ||||
2842 | virtual bool isSDNodeSourceOfDivergence(const SDNode *N, | |||
2843 | FunctionLoweringInfo *FLI, | |||
2844 | LegacyDivergenceAnalysis *DA) const { | |||
2845 | return false; | |||
2846 | } | |||
2847 | ||||
2848 | virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { | |||
2849 | return false; | |||
2850 | } | |||
2851 | ||||
2852 | /// Returns true by value, base pointer and offset pointer and addressing mode | |||
2853 | /// by reference if the node's address can be legally represented as | |||
2854 | /// pre-indexed load / store address. | |||
2855 | virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, | |||
2856 | SDValue &/*Offset*/, | |||
2857 | ISD::MemIndexedMode &/*AM*/, | |||
2858 | SelectionDAG &/*DAG*/) const { | |||
2859 | return false; | |||
2860 | } | |||
2861 | ||||
2862 | /// Returns true by value, base pointer and offset pointer and addressing mode | |||
2863 | /// by reference if this node can be combined with a load / store to form a | |||
2864 | /// post-indexed load / store. | |||
2865 | virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, | |||
2866 | SDValue &/*Base*/, | |||
2867 | SDValue &/*Offset*/, | |||
2868 | ISD::MemIndexedMode &/*AM*/, | |||
2869 | SelectionDAG &/*DAG*/) const { | |||
2870 | return false; | |||
2871 | } | |||
2872 | ||||
2873 | /// Return the entry encoding for a jump table in the current function. The | |||
2874 | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. | |||
2875 | virtual unsigned getJumpTableEncoding() const; | |||
2876 | ||||
2877 | virtual const MCExpr * | |||
2878 | LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, | |||
2879 | const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, | |||
2880 | MCContext &/*Ctx*/) const { | |||
2881 | llvm_unreachable("Need to implement this hook if target has custom JTIs")::llvm::llvm_unreachable_internal("Need to implement this hook if target has custom JTIs" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 2881); | |||
2882 | } | |||
2883 | ||||
2884 | /// Returns relocation base for the given PIC jumptable. | |||
2885 | virtual SDValue getPICJumpTableRelocBase(SDValue Table, | |||
2886 | SelectionDAG &DAG) const; | |||
2887 | ||||
2888 | /// This returns the relocation base for the given PIC jumptable, the same as | |||
2889 | /// getPICJumpTableRelocBase, but as an MCExpr. | |||
2890 | virtual const MCExpr * | |||
2891 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, | |||
2892 | unsigned JTI, MCContext &Ctx) const; | |||
2893 | ||||
2894 | /// Return true if folding a constant offset with the given GlobalAddress is | |||
2895 | /// legal. It is frequently not legal in PIC relocation models. | |||
2896 | virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; | |||
2897 | ||||
2898 | bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, | |||
2899 | SDValue &Chain) const; | |||
2900 | ||||
2901 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, | |||
2902 | SDValue &NewRHS, ISD::CondCode &CCCode, | |||
2903 | const SDLoc &DL) const; | |||
2904 | ||||
2905 | /// Returns a pair of (return value, chain). | |||
2906 | /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. | |||
2907 | std::pair<SDValue, SDValue> makeLibCall( | |||
2908 | SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops, | |||
2909 | bool isSigned, const SDLoc &dl, bool doesNotReturn = false, | |||
2910 | bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const; | |||
2911 | ||||
2912 | /// Check whether parameters to a call that are passed in callee saved | |||
2913 | /// registers are the same as from the calling function. This needs to be | |||
2914 | /// checked for tail call eligibility. | |||
2915 | bool parametersInCSRMatch(const MachineRegisterInfo &MRI, | |||
2916 | const uint32_t *CallerPreservedMask, | |||
2917 | const SmallVectorImpl<CCValAssign> &ArgLocs, | |||
2918 | const SmallVectorImpl<SDValue> &OutVals) const; | |||
2919 | ||||
2920 | //===--------------------------------------------------------------------===// | |||
2921 | // TargetLowering Optimization Methods | |||
2922 | // | |||
2923 | ||||
2924 | /// A convenience struct that encapsulates a DAG, and two SDValues for | |||
2925 | /// returning information from TargetLowering to its clients that want to | |||
2926 | /// combine. | |||
2927 | struct TargetLoweringOpt { | |||
2928 | SelectionDAG &DAG; | |||
2929 | bool LegalTys; | |||
2930 | bool LegalOps; | |||
2931 | SDValue Old; | |||
2932 | SDValue New; | |||
2933 | ||||
2934 | explicit TargetLoweringOpt(SelectionDAG &InDAG, | |||
2935 | bool LT, bool LO) : | |||
2936 | DAG(InDAG), LegalTys(LT), LegalOps(LO) {} | |||
2937 | ||||
2938 | bool LegalTypes() const { return LegalTys; } | |||
2939 | bool LegalOperations() const { return LegalOps; } | |||
2940 | ||||
2941 | bool CombineTo(SDValue O, SDValue N) { | |||
2942 | Old = O; | |||
2943 | New = N; | |||
2944 | return true; | |||
2945 | } | |||
2946 | }; | |||
2947 | ||||
2948 | /// Determines the optimal series of memory ops to replace the memset / memcpy. | |||
2949 | /// Return true if the number of memory ops is below the threshold (Limit). | |||
2950 | /// It returns the types of the sequence of memory ops to perform | |||
2951 | /// memset / memcpy by reference. | |||
2952 | bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, | |||
2953 | unsigned Limit, uint64_t Size, | |||
2954 | unsigned DstAlign, unsigned SrcAlign, | |||
2955 | bool IsMemset, | |||
2956 | bool ZeroMemset, | |||
2957 | bool MemcpyStrSrc, | |||
2958 | bool AllowOverlap, | |||
2959 | unsigned DstAS, unsigned SrcAS, | |||
2960 | const AttributeList &FuncAttributes) const; | |||
2961 | ||||
2962 | /// Check to see if the specified operand of the specified instruction is a | |||
2963 | /// constant integer. If so, check to see if there are any bits set in the | |||
2964 | /// constant that are not demanded. If so, shrink the constant and return | |||
2965 | /// true. | |||
2966 | bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, | |||
2967 | TargetLoweringOpt &TLO) const; | |||
2968 | ||||
2969 | // Target hook to do target-specific const optimization, which is called by | |||
2970 | // ShrinkDemandedConstant. This function should return true if the target | |||
2971 | // doesn't want ShrinkDemandedConstant to further optimize the constant. | |||
2972 | virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, | |||
2973 | TargetLoweringOpt &TLO) const { | |||
2974 | return false; | |||
2975 | } | |||
2976 | ||||
2977 | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This | |||
2978 | /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be | |||
2979 | /// generalized for targets with other types of implicit widening casts. | |||
2980 | bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, | |||
2981 | TargetLoweringOpt &TLO) const; | |||
2982 | ||||
2983 | /// Look at Op. At this point, we know that only the DemandedBits bits of the | |||
2984 | /// result of Op are ever used downstream. If we can use this information to | |||
2985 | /// simplify Op, create a new simplified DAG node and return true, returning | |||
2986 | /// the original and new nodes in Old and New. Otherwise, analyze the | |||
2987 | /// expression and return a mask of KnownOne and KnownZero bits for the | |||
2988 | /// expression (used to simplify the caller). The KnownZero/One bits may only | |||
2989 | /// be accurate for those bits in the Demanded masks. | |||
2990 | /// \p AssumeSingleUse When this parameter is true, this function will | |||
2991 | /// attempt to simplify \p Op even if there are multiple uses. | |||
2992 | /// Callers are responsible for correctly updating the DAG based on the | |||
2993 | /// results of this function, because simply replacing replacing TLO.Old | |||
2994 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old | |||
2995 | /// has multiple uses. | |||
2996 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, | |||
2997 | const APInt &DemandedElts, KnownBits &Known, | |||
2998 | TargetLoweringOpt &TLO, unsigned Depth = 0, | |||
2999 | bool AssumeSingleUse = false) const; | |||
3000 | ||||
3001 | /// Helper wrapper around SimplifyDemandedBits, demanding all elements. | |||
3002 | /// Adds Op back to the worklist upon success. | |||
3003 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, | |||
3004 | KnownBits &Known, TargetLoweringOpt &TLO, | |||
3005 | unsigned Depth = 0, | |||
3006 | bool AssumeSingleUse = false) const; | |||
3007 | ||||
3008 | /// Helper wrapper around SimplifyDemandedBits. | |||
3009 | /// Adds Op back to the worklist upon success. | |||
3010 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, | |||
3011 | DAGCombinerInfo &DCI) const; | |||
3012 | ||||
3013 | /// Look at Vector Op. At this point, we know that only the DemandedElts | |||
3014 | /// elements of the result of Op are ever used downstream. If we can use | |||
3015 | /// this information to simplify Op, create a new simplified DAG node and | |||
3016 | /// return true, storing the original and new nodes in TLO. | |||
3017 | /// Otherwise, analyze the expression and return a mask of KnownUndef and | |||
3018 | /// KnownZero elements for the expression (used to simplify the caller). | |||
3019 | /// The KnownUndef/Zero elements may only be accurate for those bits | |||
3020 | /// in the DemandedMask. | |||
3021 | /// \p AssumeSingleUse When this parameter is true, this function will | |||
3022 | /// attempt to simplify \p Op even if there are multiple uses. | |||
3023 | /// Callers are responsible for correctly updating the DAG based on the | |||
3024 | /// results of this function, because simply replacing replacing TLO.Old | |||
3025 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old | |||
3026 | /// has multiple uses. | |||
3027 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, | |||
3028 | APInt &KnownUndef, APInt &KnownZero, | |||
3029 | TargetLoweringOpt &TLO, unsigned Depth = 0, | |||
3030 | bool AssumeSingleUse = false) const; | |||
3031 | ||||
3032 | /// Helper wrapper around SimplifyDemandedVectorElts. | |||
3033 | /// Adds Op back to the worklist upon success. | |||
3034 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, | |||
3035 | APInt &KnownUndef, APInt &KnownZero, | |||
3036 | DAGCombinerInfo &DCI) const; | |||
3037 | ||||
3038 | /// Determine which of the bits specified in Mask are known to be either zero | |||
3039 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts | |||
3040 | /// argument allows us to only collect the known bits that are shared by the | |||
3041 | /// requested vector elements. | |||
3042 | virtual void computeKnownBitsForTargetNode(const SDValue Op, | |||
3043 | KnownBits &Known, | |||
3044 | const APInt &DemandedElts, | |||
3045 | const SelectionDAG &DAG, | |||
3046 | unsigned Depth = 0) const; | |||
3047 | ||||
3048 | /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. | |||
3049 | /// Default implementation computes low bits based on alignment | |||
3050 | /// information. This should preserve known bits passed into it. | |||
3051 | virtual void computeKnownBitsForFrameIndex(const SDValue FIOp, | |||
3052 | KnownBits &Known, | |||
3053 | const APInt &DemandedElts, | |||
3054 | const SelectionDAG &DAG, | |||
3055 | unsigned Depth = 0) const; | |||
3056 | ||||
3057 | /// This method can be implemented by targets that want to expose additional | |||
3058 | /// information about sign bits to the DAG Combiner. The DemandedElts | |||
3059 | /// argument allows us to only collect the minimum sign bits that are shared | |||
3060 | /// by the requested vector elements. | |||
3061 | virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, | |||
3062 | const APInt &DemandedElts, | |||
3063 | const SelectionDAG &DAG, | |||
3064 | unsigned Depth = 0) const; | |||
3065 | ||||
3066 | /// Attempt to simplify any target nodes based on the demanded vector | |||
3067 | /// elements, returning true on success. Otherwise, analyze the expression and | |||
3068 | /// return a mask of KnownUndef and KnownZero elements for the expression | |||
3069 | /// (used to simplify the caller). The KnownUndef/Zero elements may only be | |||
3070 | /// accurate for those bits in the DemandedMask. | |||
3071 | virtual bool SimplifyDemandedVectorEltsForTargetNode( | |||
3072 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, | |||
3073 | APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; | |||
3074 | ||||
3075 | /// Attempt to simplify any target nodes based on the demanded bits/elts, | |||
3076 | /// returning true on success. Otherwise, analyze the | |||
3077 | /// expression and return a mask of KnownOne and KnownZero bits for the | |||
3078 | /// expression (used to simplify the caller). The KnownZero/One bits may only | |||
3079 | /// be accurate for those bits in the Demanded masks. | |||
3080 | virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, | |||
3081 | const APInt &DemandedBits, | |||
3082 | const APInt &DemandedElts, | |||
3083 | KnownBits &Known, | |||
3084 | TargetLoweringOpt &TLO, | |||
3085 | unsigned Depth = 0) const; | |||
3086 | ||||
3087 | /// If \p SNaN is false, \returns true if \p Op is known to never be any | |||
3088 | /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling | |||
3089 | /// NaN. | |||
3090 | virtual bool isKnownNeverNaNForTargetNode(SDValue Op, | |||
3091 | const SelectionDAG &DAG, | |||
3092 | bool SNaN = false, | |||
3093 | unsigned Depth = 0) const; | |||
3094 | struct DAGCombinerInfo { | |||
3095 | void *DC; // The DAG Combiner object. | |||
3096 | CombineLevel Level; | |||
3097 | bool CalledByLegalizer; | |||
3098 | ||||
3099 | public: | |||
3100 | SelectionDAG &DAG; | |||
3101 | ||||
3102 | DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) | |||
3103 | : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} | |||
3104 | ||||
3105 | bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } | |||
3106 | bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } | |||
3107 | bool isAfterLegalizeDAG() const { | |||
3108 | return Level == AfterLegalizeDAG; | |||
3109 | } | |||
3110 | CombineLevel getDAGCombineLevel() { return Level; } | |||
3111 | bool isCalledByLegalizer() const { return CalledByLegalizer; } | |||
3112 | ||||
3113 | void AddToWorklist(SDNode *N); | |||
3114 | SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); | |||
3115 | SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); | |||
3116 | SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); | |||
3117 | ||||
3118 | void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); | |||
3119 | }; | |||
3120 | ||||
3121 | /// Return if the N is a constant or constant vector equal to the true value | |||
3122 | /// from getBooleanContents(). | |||
3123 | bool isConstTrueVal(const SDNode *N) const; | |||
3124 | ||||
3125 | /// Return if the N is a constant or constant vector equal to the false value | |||
3126 | /// from getBooleanContents(). | |||
3127 | bool isConstFalseVal(const SDNode *N) const; | |||
3128 | ||||
3129 | /// Return if \p N is a True value when extended to \p VT. | |||
3130 | bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; | |||
3131 | ||||
3132 | /// Try to simplify a setcc built with the specified operands and cc. If it is | |||
3133 | /// unable to simplify it, return a null SDValue. | |||
3134 | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, | |||
3135 | bool foldBooleans, DAGCombinerInfo &DCI, | |||
3136 | const SDLoc &dl) const; | |||
3137 | ||||
3138 | // For targets which wrap address, unwrap for analysis. | |||
3139 | virtual SDValue unwrapAddress(SDValue N) const { return N; } | |||
3140 | ||||
3141 | /// Returns true (and the GlobalValue and the offset) if the node is a | |||
3142 | /// GlobalAddress + offset. | |||
3143 | virtual bool | |||
3144 | isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; | |||
3145 | ||||
3146 | /// This method will be invoked for all target nodes and for any | |||
3147 | /// target-independent nodes that the target has registered with invoke it | |||
3148 | /// for. | |||
3149 | /// | |||
3150 | /// The semantics are as follows: | |||
3151 | /// Return Value: | |||
3152 | /// SDValue.Val == 0 - No change was made | |||
3153 | /// SDValue.Val == N - N was replaced, is dead, and is already handled. | |||
3154 | /// otherwise - N should be replaced by the returned Operand. | |||
3155 | /// | |||
3156 | /// In addition, methods provided by DAGCombinerInfo may be used to perform | |||
3157 | /// more complex transformations. | |||
3158 | /// | |||
3159 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; | |||
3160 | ||||
3161 | /// Return true if it is profitable to move this shift by a constant amount | |||
3162 | /// though its operand, adjusting any immediate operands as necessary to | |||
3163 | /// preserve semantics. This transformation may not be desirable if it | |||
3164 | /// disrupts a particularly auspicious target-specific tree (e.g. bitfield | |||
3165 | /// extraction in AArch64). By default, it returns true. | |||
3166 | /// | |||
3167 | /// @param N the shift node | |||
3168 | /// @param Level the current DAGCombine legalization level. | |||
3169 | virtual bool isDesirableToCommuteWithShift(const SDNode *N, | |||
3170 | CombineLevel Level) const { | |||
3171 | return true; | |||
3172 | } | |||
3173 | ||||
3174 | // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern | |||
3175 | // to a shuffle and a truncate. | |||
3176 | // Example of such a combine: | |||
3177 | // v4i32 build_vector((extract_elt V, 1), | |||
3178 | // (extract_elt V, 3), | |||
3179 | // (extract_elt V, 5), | |||
3180 | // (extract_elt V, 7)) | |||
3181 | // --> | |||
3182 | // v4i32 truncate (bitcast (shuffle<1,u,3,u,5,u,7,u> V, u) to v4i64) | |||
3183 | virtual bool isDesirableToCombineBuildVectorToShuffleTruncate( | |||
3184 | ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const { | |||
3185 | return false; | |||
3186 | } | |||
3187 | ||||
3188 | /// Return true if the target has native support for the specified value type | |||
3189 | /// and it is 'desirable' to use the type for the given node type. e.g. On x86 | |||
3190 | /// i16 is legal, but undesirable since i16 instruction encodings are longer | |||
3191 | /// and some i16 instructions are slow. | |||
3192 | virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { | |||
3193 | // By default, assume all legal types are desirable. | |||
3194 | return isTypeLegal(VT); | |||
3195 | } | |||
3196 | ||||
3197 | /// Return true if it is profitable for dag combiner to transform a floating | |||
3198 | /// point op of specified opcode to a equivalent op of an integer | |||
3199 | /// type. e.g. f32 load -> i32 load can be profitable on ARM. | |||
3200 | virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, | |||
3201 | EVT /*VT*/) const { | |||
3202 | return false; | |||
3203 | } | |||
3204 | ||||
3205 | /// This method query the target whether it is beneficial for dag combiner to | |||
3206 | /// promote the specified node. If true, it should return the desired | |||
3207 | /// promotion type by reference. | |||
3208 | virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { | |||
3209 | return false; | |||
3210 | } | |||
3211 | ||||
3212 | /// Return true if the target supports swifterror attribute. It optimizes | |||
3213 | /// loads and stores to reading and writing a specific register. | |||
3214 | virtual bool supportSwiftError() const { | |||
3215 | return false; | |||
3216 | } | |||
3217 | ||||
3218 | /// Return true if the target supports that a subset of CSRs for the given | |||
3219 | /// machine function is handled explicitly via copies. | |||
3220 | virtual bool supportSplitCSR(MachineFunction *MF) const { | |||
3221 | return false; | |||
3222 | } | |||
3223 | ||||
3224 | /// Perform necessary initialization to handle a subset of CSRs explicitly | |||
3225 | /// via copies. This function is called at the beginning of instruction | |||
3226 | /// selection. | |||
3227 | virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { | |||
3228 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 3228); | |||
3229 | } | |||
3230 | ||||
3231 | /// Insert explicit copies in entry and exit blocks. We copy a subset of | |||
3232 | /// CSRs to virtual registers in the entry block, and copy them back to | |||
3233 | /// physical registers in the exit blocks. This function is called at the end | |||
3234 | /// of instruction selection. | |||
3235 | virtual void insertCopiesSplitCSR( | |||
3236 | MachineBasicBlock *Entry, | |||
3237 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { | |||
3238 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 3238); | |||
3239 | } | |||
3240 | ||||
3241 | //===--------------------------------------------------------------------===// | |||
3242 | // Lowering methods - These methods must be implemented by targets so that | |||
3243 | // the SelectionDAGBuilder code knows how to lower these. | |||
3244 | // | |||
3245 | ||||
3246 | /// This hook must be implemented to lower the incoming (formal) arguments, | |||
3247 | /// described by the Ins array, into the specified DAG. The implementation | |||
3248 | /// should fill in the InVals array with legal-type argument values, and | |||
3249 | /// return the resulting token chain value. | |||
3250 | virtual SDValue LowerFormalArguments( | |||
3251 | SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, | |||
3252 | const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, | |||
3253 | SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { | |||
3254 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 3254); | |||
3255 | } | |||
3256 | ||||
3257 | /// This structure contains all information that is necessary for lowering | |||
3258 | /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder | |||
3259 | /// needs to lower a call, and targets will see this struct in their LowerCall | |||
3260 | /// implementation. | |||
3261 | struct CallLoweringInfo { | |||
3262 | SDValue Chain; | |||
3263 | Type *RetTy = nullptr; | |||
3264 | bool RetSExt : 1; | |||
3265 | bool RetZExt : 1; | |||
3266 | bool IsVarArg : 1; | |||
3267 | bool IsInReg : 1; | |||
3268 | bool DoesNotReturn : 1; | |||
3269 | bool IsReturnValueUsed : 1; | |||
3270 | bool IsConvergent : 1; | |||
3271 | bool IsPatchPoint : 1; | |||
3272 | ||||
3273 | // IsTailCall should be modified by implementations of | |||
3274 | // TargetLowering::LowerCall that perform tail call conversions. | |||
3275 | bool IsTailCall = false; | |||
3276 | ||||
3277 | // Is Call lowering done post SelectionDAG type legalization. | |||
3278 | bool IsPostTypeLegalization = false; | |||
3279 | ||||
3280 | unsigned NumFixedArgs = -1; | |||
3281 | CallingConv::ID CallConv = CallingConv::C; | |||
3282 | SDValue Callee; | |||
3283 | ArgListTy Args; | |||
3284 | SelectionDAG &DAG; | |||
3285 | SDLoc DL; | |||
3286 | ImmutableCallSite CS; | |||
3287 | SmallVector<ISD::OutputArg, 32> Outs; | |||
3288 | SmallVector<SDValue, 32> OutVals; | |||
3289 | SmallVector<ISD::InputArg, 32> Ins; | |||
3290 | SmallVector<SDValue, 4> InVals; | |||
3291 | ||||
3292 | CallLoweringInfo(SelectionDAG &DAG) | |||
3293 | : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), | |||
3294 | DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), | |||
3295 | IsPatchPoint(false), DAG(DAG) {} | |||
3296 | ||||
3297 | CallLoweringInfo &setDebugLoc(const SDLoc &dl) { | |||
3298 | DL = dl; | |||
3299 | return *this; | |||
3300 | } | |||
3301 | ||||
3302 | CallLoweringInfo &setChain(SDValue InChain) { | |||
3303 | Chain = InChain; | |||
3304 | return *this; | |||
3305 | } | |||
3306 | ||||
3307 | // setCallee with target/module-specific attributes | |||
3308 | CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, | |||
3309 | SDValue Target, ArgListTy &&ArgsList) { | |||
3310 | RetTy = ResultType; | |||
3311 | Callee = Target; | |||
3312 | CallConv = CC; | |||
3313 | NumFixedArgs = ArgsList.size(); | |||
3314 | Args = std::move(ArgsList); | |||
3315 | ||||
3316 | DAG.getTargetLoweringInfo().markLibCallAttributes( | |||
3317 | &(DAG.getMachineFunction()), CC, Args); | |||
3318 | return *this; | |||
3319 | } | |||
3320 | ||||
3321 | CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, | |||
3322 | SDValue Target, ArgListTy &&ArgsList) { | |||
3323 | RetTy = ResultType; | |||
3324 | Callee = Target; | |||
3325 | CallConv = CC; | |||
3326 | NumFixedArgs = ArgsList.size(); | |||
3327 | Args = std::move(ArgsList); | |||
3328 | return *this; | |||
3329 | } | |||
3330 | ||||
3331 | CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, | |||
3332 | SDValue Target, ArgListTy &&ArgsList, | |||
3333 | ImmutableCallSite Call) { | |||
3334 | RetTy = ResultType; | |||
3335 | ||||
3336 | IsInReg = Call.hasRetAttr(Attribute::InReg); | |||
3337 | DoesNotReturn = | |||
3338 | Call.doesNotReturn() || | |||
3339 | (!Call.isInvoke() && | |||
3340 | isa<UnreachableInst>(Call.getInstruction()->getNextNode())); | |||
3341 | IsVarArg = FTy->isVarArg(); | |||
3342 | IsReturnValueUsed = !Call.getInstruction()->use_empty(); | |||
3343 | RetSExt = Call.hasRetAttr(Attribute::SExt); | |||
3344 | RetZExt = Call.hasRetAttr(Attribute::ZExt); | |||
3345 | ||||
3346 | Callee = Target; | |||
3347 | ||||
3348 | CallConv = Call.getCallingConv(); | |||
3349 | NumFixedArgs = FTy->getNumParams(); | |||
3350 | Args = std::move(ArgsList); | |||
3351 | ||||
3352 | CS = Call; | |||
3353 | ||||
3354 | return *this; | |||
3355 | } | |||
3356 | ||||
3357 | CallLoweringInfo &setInRegister(bool Value = true) { | |||
3358 | IsInReg = Value; | |||
3359 | return *this; | |||
3360 | } | |||
3361 | ||||
3362 | CallLoweringInfo &setNoReturn(bool Value = true) { | |||
3363 | DoesNotReturn = Value; | |||
3364 | return *this; | |||
3365 | } | |||
3366 | ||||
3367 | CallLoweringInfo &setVarArg(bool Value = true) { | |||
3368 | IsVarArg = Value; | |||
3369 | return *this; | |||
3370 | } | |||
3371 | ||||
3372 | CallLoweringInfo &setTailCall(bool Value = true) { | |||
3373 | IsTailCall = Value; | |||
3374 | return *this; | |||
3375 | } | |||
3376 | ||||
3377 | CallLoweringInfo &setDiscardResult(bool Value = true) { | |||
3378 | IsReturnValueUsed = !Value; | |||
3379 | return *this; | |||
3380 | } | |||
3381 | ||||
3382 | CallLoweringInfo &setConvergent(bool Value = true) { | |||
3383 | IsConvergent = Value; | |||
3384 | return *this; | |||
3385 | } | |||
3386 | ||||
3387 | CallLoweringInfo &setSExtResult(bool Value = true) { | |||
3388 | RetSExt = Value; | |||
3389 | return *this; | |||
3390 | } | |||
3391 | ||||
3392 | CallLoweringInfo &setZExtResult(bool Value = true) { | |||
3393 | RetZExt = Value; | |||
3394 | return *this; | |||
3395 | } | |||
3396 | ||||
3397 | CallLoweringInfo &setIsPatchPoint(bool Value = true) { | |||
3398 | IsPatchPoint = Value; | |||
3399 | return *this; | |||
3400 | } | |||
3401 | ||||
3402 | CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { | |||
3403 | IsPostTypeLegalization = Value; | |||
3404 | return *this; | |||
3405 | } | |||
3406 | ||||
3407 | ArgListTy &getArgs() { | |||
3408 | return Args; | |||
3409 | } | |||
3410 | }; | |||
3411 | ||||
3412 | /// This function lowers an abstract call to a function into an actual call. | |||
3413 | /// This returns a pair of operands. The first element is the return value | |||
3414 | /// for the function (if RetTy is not VoidTy). The second element is the | |||
3415 | /// outgoing token chain. It calls LowerCall to do the actual lowering. | |||
3416 | std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; | |||
3417 | ||||
3418 | /// This hook must be implemented to lower calls into the specified | |||
3419 | /// DAG. The outgoing arguments to the call are described by the Outs array, | |||
3420 | /// and the values to be returned by the call are described by the Ins | |||
3421 | /// array. The implementation should fill in the InVals array with legal-type | |||
3422 | /// return values from the call, and return the resulting token chain value. | |||
3423 | virtual SDValue | |||
3424 | LowerCall(CallLoweringInfo &/*CLI*/, | |||
3425 | SmallVectorImpl<SDValue> &/*InVals*/) const { | |||
3426 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 3426); | |||
3427 | } | |||
3428 | ||||
3429 | /// Target-specific cleanup for formal ByVal parameters. | |||
3430 | virtual void HandleByVal(CCState *, unsigned &, unsigned) const {} | |||
3431 | ||||
3432 | /// This hook should be implemented to check whether the return values | |||
3433 | /// described by the Outs array can fit into the return registers. If false | |||
3434 | /// is returned, an sret-demotion is performed. | |||
3435 | virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, | |||
3436 | MachineFunction &/*MF*/, bool /*isVarArg*/, | |||
3437 | const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, | |||
3438 | LLVMContext &/*Context*/) const | |||
3439 | { | |||
3440 | // Return true by default to get preexisting behavior. | |||
3441 | return true; | |||
3442 | } | |||
3443 | ||||
3444 | /// This hook must be implemented to lower outgoing return values, described | |||
3445 | /// by the Outs array, into the specified DAG. The implementation should | |||
3446 | /// return the resulting token chain value. | |||
3447 | virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, | |||
3448 | bool /*isVarArg*/, | |||
3449 | const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, | |||
3450 | const SmallVectorImpl<SDValue> & /*OutVals*/, | |||
3451 | const SDLoc & /*dl*/, | |||
3452 | SelectionDAG & /*DAG*/) const { | |||
3453 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 3453); | |||
3454 | } | |||
3455 | ||||
3456 | /// Return true if result of the specified node is used by a return node | |||
3457 | /// only. It also compute and return the input chain for the tail call. | |||
3458 | /// | |||
3459 | /// This is used to determine whether it is possible to codegen a libcall as | |||
3460 | /// tail call at legalization time. | |||
3461 | virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { | |||
3462 | return false; | |||
3463 | } | |||
3464 | ||||
3465 | /// Return true if the target may be able emit the call instruction as a tail | |||
3466 | /// call. This is used by optimization passes to determine if it's profitable | |||
3467 | /// to duplicate return instructions to enable tailcall optimization. | |||
3468 | virtual bool mayBeEmittedAsTailCall(const CallInst *) const { | |||
3469 | return false; | |||
3470 | } | |||
3471 | ||||
3472 | /// Return the builtin name for the __builtin___clear_cache intrinsic | |||
3473 | /// Default is to invoke the clear cache library call | |||
3474 | virtual const char * getClearCacheBuiltinName() const { | |||
3475 | return "__clear_cache"; | |||
3476 | } | |||
3477 | ||||
3478 | /// Return the register ID of the name passed in. Used by named register | |||
3479 | /// global variables extension. There is no target-independent behaviour | |||
3480 | /// so the default action is to bail. | |||
3481 | virtual unsigned getRegisterByName(const char* RegName, EVT VT, | |||
3482 | SelectionDAG &DAG) const { | |||
3483 | report_fatal_error("Named registers not implemented for this target"); | |||
3484 | } | |||
3485 | ||||
3486 | /// Return the type that should be used to zero or sign extend a | |||
3487 | /// zeroext/signext integer return value. FIXME: Some C calling conventions | |||
3488 | /// require the return type to be promoted, but this is not true all the time, | |||
3489 | /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling | |||
3490 | /// conventions. The frontend should handle this and include all of the | |||
3491 | /// necessary information. | |||
3492 | virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, | |||
3493 | ISD::NodeType /*ExtendKind*/) const { | |||
3494 | EVT MinVT = getRegisterType(Context, MVT::i32); | |||
3495 | return VT.bitsLT(MinVT) ? MinVT : VT; | |||
3496 | } | |||
3497 | ||||
3498 | /// For some targets, an LLVM struct type must be broken down into multiple | |||
3499 | /// simple types, but the calling convention specifies that the entire struct | |||
3500 | /// must be passed in a block of consecutive registers. | |||
3501 | virtual bool | |||
3502 | functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, | |||
3503 | bool isVarArg) const { | |||
3504 | return false; | |||
3505 | } | |||
3506 | ||||
3507 | /// Returns a 0 terminated array of registers that can be safely used as | |||
3508 | /// scratch registers. | |||
3509 | virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { | |||
3510 | return nullptr; | |||
3511 | } | |||
3512 | ||||
3513 | /// This callback is used to prepare for a volatile or atomic load. | |||
3514 | /// It takes a chain node as input and returns the chain for the load itself. | |||
3515 | /// | |||
3516 | /// Having a callback like this is necessary for targets like SystemZ, | |||
3517 | /// which allows a CPU to reuse the result of a previous load indefinitely, | |||
3518 | /// even if a cache-coherent store is performed by another CPU. The default | |||
3519 | /// implementation does nothing. | |||
3520 | virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, | |||
3521 | SelectionDAG &DAG) const { | |||
3522 | return Chain; | |||
3523 | } | |||
3524 | ||||
3525 | /// This callback is used to inspect load/store instructions and add | |||
3526 | /// target-specific MachineMemOperand flags to them. The default | |||
3527 | /// implementation does nothing. | |||
3528 | virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const { | |||
3529 | return MachineMemOperand::MONone; | |||
3530 | } | |||
3531 | ||||
3532 | /// This callback is invoked by the type legalizer to legalize nodes with an | |||
3533 | /// illegal operand type but legal result types. It replaces the | |||
3534 | /// LowerOperation callback in the type Legalizer. The reason we can not do | |||
3535 | /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to | |||
3536 | /// use this callback. | |||
3537 | /// | |||
3538 | /// TODO: Consider merging with ReplaceNodeResults. | |||
3539 | /// | |||
3540 | /// The target places new result values for the node in Results (their number | |||
3541 | /// and types must exactly match those of the original return values of | |||
3542 | /// the node), or leaves Results empty, which indicates that the node is not | |||
3543 | /// to be custom lowered after all. | |||
3544 | /// The default implementation calls LowerOperation. | |||
3545 | virtual void LowerOperationWrapper(SDNode *N, | |||
3546 | SmallVectorImpl<SDValue> &Results, | |||
3547 | SelectionDAG &DAG) const; | |||
3548 | ||||
3549 | /// This callback is invoked for operations that are unsupported by the | |||
3550 | /// target, which are registered to use 'custom' lowering, and whose defined | |||
3551 | /// values are all legal. If the target has no operations that require custom | |||
3552 | /// lowering, it need not implement this. The default implementation of this | |||
3553 | /// aborts. | |||
3554 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; | |||
3555 | ||||
3556 | /// This callback is invoked when a node result type is illegal for the | |||
3557 | /// target, and the operation was registered to use 'custom' lowering for that | |||
3558 | /// result type. The target places new result values for the node in Results | |||
3559 | /// (their number and types must exactly match those of the original return | |||
3560 | /// values of the node), or leaves Results empty, which indicates that the | |||
3561 | /// node is not to be custom lowered after all. | |||
3562 | /// | |||
3563 | /// If the target has no operations that require custom lowering, it need not | |||
3564 | /// implement this. The default implementation aborts. | |||
3565 | virtual void ReplaceNodeResults(SDNode * /*N*/, | |||
3566 | SmallVectorImpl<SDValue> &/*Results*/, | |||
3567 | SelectionDAG &/*DAG*/) const { | |||
3568 | llvm_unreachable("ReplaceNodeResults not implemented for this target!")::llvm::llvm_unreachable_internal("ReplaceNodeResults not implemented for this target!" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 3568); | |||
3569 | } | |||
3570 | ||||
3571 | /// This method returns the name of a target specific DAG node. | |||
3572 | virtual const char *getTargetNodeName(unsigned Opcode) const; | |||
3573 | ||||
3574 | /// This method returns a target specific FastISel object, or null if the | |||
3575 | /// target does not support "fast" ISel. | |||
3576 | virtual FastISel *createFastISel(FunctionLoweringInfo &, | |||
3577 | const TargetLibraryInfo *) const { | |||
3578 | return nullptr; | |||
3579 | } | |||
3580 | ||||
3581 | bool verifyReturnAddressArgumentIsConstant(SDValue Op, | |||
3582 | SelectionDAG &DAG) const; | |||
3583 | ||||
3584 | //===--------------------------------------------------------------------===// | |||
3585 | // Inline Asm Support hooks | |||
3586 | // | |||
3587 | ||||
3588 | /// This hook allows the target to expand an inline asm call to be explicit | |||
3589 | /// llvm code if it wants to. This is useful for turning simple inline asms | |||
3590 | /// into LLVM intrinsics, which gives the compiler more information about the | |||
3591 | /// behavior of the code. | |||
3592 | virtual bool ExpandInlineAsm(CallInst *) const { | |||
3593 | return false; | |||
3594 | } | |||
3595 | ||||
3596 | enum ConstraintType { | |||
3597 | C_Register, // Constraint represents specific register(s). | |||
3598 | C_RegisterClass, // Constraint represents any of register(s) in class. | |||
3599 | C_Memory, // Memory constraint. | |||
3600 | C_Other, // Something else. | |||
3601 | C_Unknown // Unsupported constraint. | |||
3602 | }; | |||
3603 | ||||
3604 | enum ConstraintWeight { | |||
3605 | // Generic weights. | |||
3606 | CW_Invalid = -1, // No match. | |||
3607 | CW_Okay = 0, // Acceptable. | |||
3608 | CW_Good = 1, // Good weight. | |||
3609 | CW_Better = 2, // Better weight. | |||
3610 | CW_Best = 3, // Best weight. | |||
3611 | ||||
3612 | // Well-known weights. | |||
3613 | CW_SpecificReg = CW_Okay, // Specific register operands. | |||
3614 | CW_Register = CW_Good, // Register operands. | |||
3615 | CW_Memory = CW_Better, // Memory operands. | |||
3616 | CW_Constant = CW_Best, // Constant operand. | |||
3617 | CW_Default = CW_Okay // Default or don't know type. | |||
3618 | }; | |||
3619 | ||||
3620 | /// This contains information for each constraint that we are lowering. | |||
3621 | struct AsmOperandInfo : public InlineAsm::ConstraintInfo { | |||
3622 | /// This contains the actual string for the code, like "m". TargetLowering | |||
3623 | /// picks the 'best' code from ConstraintInfo::Codes that most closely | |||
3624 | /// matches the operand. | |||
3625 | std::string ConstraintCode; | |||
3626 | ||||
3627 | /// Information about the constraint code, e.g. Register, RegisterClass, | |||
3628 | /// Memory, Other, Unknown. | |||
3629 | TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; | |||
3630 | ||||
3631 | /// If this is the result output operand or a clobber, this is null, | |||
3632 | /// otherwise it is the incoming operand to the CallInst. This gets | |||
3633 | /// modified as the asm is processed. | |||
3634 | Value *CallOperandVal = nullptr; | |||
3635 | ||||
3636 | /// The ValueType for the operand value. | |||
3637 | MVT ConstraintVT = MVT::Other; | |||
3638 | ||||
3639 | /// Copy constructor for copying from a ConstraintInfo. | |||
3640 | AsmOperandInfo(InlineAsm::ConstraintInfo Info) | |||
3641 | : InlineAsm::ConstraintInfo(std::move(Info)) {} | |||
3642 | ||||
3643 | /// Return true of this is an input operand that is a matching constraint | |||
3644 | /// like "4". | |||
3645 | bool isMatchingInputConstraint() const; | |||
3646 | ||||
3647 | /// If this is an input matching constraint, this method returns the output | |||
3648 | /// operand it matches. | |||
3649 | unsigned getMatchedOperand() const; | |||
3650 | }; | |||
3651 | ||||
3652 | using AsmOperandInfoVector = std::vector<AsmOperandInfo>; | |||
3653 | ||||
3654 | /// Split up the constraint string from the inline assembly value into the | |||
3655 | /// specific constraints and their prefixes, and also tie in the associated | |||
3656 | /// operand values. If this returns an empty vector, and if the constraint | |||
3657 | /// string itself isn't empty, there was an error parsing. | |||
3658 | virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, | |||
3659 | const TargetRegisterInfo *TRI, | |||
3660 | ImmutableCallSite CS) const; | |||
3661 | ||||
3662 | /// Examine constraint type and operand type and determine a weight value. | |||
3663 | /// The operand object must already have been set up with the operand type. | |||
3664 | virtual ConstraintWeight getMultipleConstraintMatchWeight( | |||
3665 | AsmOperandInfo &info, int maIndex) const; | |||
3666 | ||||
3667 | /// Examine constraint string and operand type and determine a weight value. | |||
3668 | /// The operand object must already have been set up with the operand type. | |||
3669 | virtual ConstraintWeight getSingleConstraintMatchWeight( | |||
3670 | AsmOperandInfo &info, const char *constraint) const; | |||
3671 | ||||
3672 | /// Determines the constraint code and constraint type to use for the specific | |||
3673 | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. | |||
3674 | /// If the actual operand being passed in is available, it can be passed in as | |||
3675 | /// Op, otherwise an empty SDValue can be passed. | |||
3676 | virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, | |||
3677 | SDValue Op, | |||
3678 | SelectionDAG *DAG = nullptr) const; | |||
3679 | ||||
3680 | /// Given a constraint, return the type of constraint it is for this target. | |||
3681 | virtual ConstraintType getConstraintType(StringRef Constraint) const; | |||
3682 | ||||
3683 | /// Given a physical register constraint (e.g. {edx}), return the register | |||
3684 | /// number and the register class for the register. | |||
3685 | /// | |||
3686 | /// Given a register class constraint, like 'r', if this corresponds directly | |||
3687 | /// to an LLVM register class, return a register of 0 and the register class | |||
3688 | /// pointer. | |||
3689 | /// | |||
3690 | /// This should only be used for C_Register constraints. On error, this | |||
3691 | /// returns a register number of 0 and a null register class pointer. | |||
3692 | virtual std::pair<unsigned, const TargetRegisterClass *> | |||
3693 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, | |||
3694 | StringRef Constraint, MVT VT) const; | |||
3695 | ||||
3696 | virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const { | |||
3697 | if (ConstraintCode == "i") | |||
3698 | return InlineAsm::Constraint_i; | |||
3699 | else if (ConstraintCode == "m") | |||
3700 | return InlineAsm::Constraint_m; | |||
3701 | return InlineAsm::Constraint_Unknown; | |||
3702 | } | |||
3703 | ||||
3704 | /// Try to replace an X constraint, which matches anything, with another that | |||
3705 | /// has more specific requirements based on the type of the corresponding | |||
3706 | /// operand. This returns null if there is no replacement to make. | |||
3707 | virtual const char *LowerXConstraint(EVT ConstraintVT) const; | |||
3708 | ||||
3709 | /// Lower the specified operand into the Ops vector. If it is invalid, don't | |||
3710 | /// add anything to Ops. | |||
3711 | virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, | |||
3712 | std::vector<SDValue> &Ops, | |||
3713 | SelectionDAG &DAG) const; | |||
3714 | ||||
3715 | // Lower custom output constraints. If invalid, return SDValue(). | |||
3716 | virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, | |||
3717 | SDLoc DL, | |||
3718 | const AsmOperandInfo &OpInfo, | |||
3719 | SelectionDAG &DAG) const; | |||
3720 | ||||
3721 | //===--------------------------------------------------------------------===// | |||
3722 | // Div utility functions | |||
3723 | // | |||
3724 | SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, | |||
3725 | SmallVectorImpl<SDNode *> &Created) const; | |||
3726 | SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, | |||
3727 | SmallVectorImpl<SDNode *> &Created) const; | |||
3728 | ||||
3729 | /// Targets may override this function to provide custom SDIV lowering for | |||
3730 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM | |||
3731 | /// assumes SDIV is expensive and replaces it with a series of other integer | |||
3732 | /// operations. | |||
3733 | virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, | |||
3734 | SelectionDAG &DAG, | |||
3735 | SmallVectorImpl<SDNode *> &Created) const; | |||
3736 | ||||
3737 | /// Indicate whether this target prefers to combine FDIVs with the same | |||
3738 | /// divisor. If the transform should never be done, return zero. If the | |||
3739 | /// transform should be done, return the minimum number of divisor uses | |||
3740 | /// that must exist. | |||
3741 | virtual unsigned combineRepeatedFPDivisors() const { | |||
3742 | return 0; | |||
3743 | } | |||
3744 | ||||
3745 | /// Hooks for building estimates in place of slower divisions and square | |||
3746 | /// roots. | |||
3747 | ||||
3748 | /// Return either a square root or its reciprocal estimate value for the input | |||
3749 | /// operand. | |||
3750 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or | |||
3751 | /// 'Enabled' as set by a potential default override attribute. | |||
3752 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson | |||
3753 | /// refinement iterations required to generate a sufficient (though not | |||
3754 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. | |||
3755 | /// The boolean UseOneConstNR output is used to select a Newton-Raphson | |||
3756 | /// algorithm implementation that uses either one or two constants. | |||
3757 | /// The boolean Reciprocal is used to select whether the estimate is for the | |||
3758 | /// square root of the input operand or the reciprocal of its square root. | |||
3759 | /// A target may choose to implement its own refinement within this function. | |||
3760 | /// If that's true, then return '0' as the number of RefinementSteps to avoid | |||
3761 | /// any further refinement of the estimate. | |||
3762 | /// An empty SDValue return means no estimate sequence can be created. | |||
3763 | virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, | |||
3764 | int Enabled, int &RefinementSteps, | |||
3765 | bool &UseOneConstNR, bool Reciprocal) const { | |||
3766 | return SDValue(); | |||
3767 | } | |||
3768 | ||||
3769 | /// Return a reciprocal estimate value for the input operand. | |||
3770 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or | |||
3771 | /// 'Enabled' as set by a potential default override attribute. | |||
3772 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson | |||
3773 | /// refinement iterations required to generate a sufficient (though not | |||
3774 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. | |||
3775 | /// A target may choose to implement its own refinement within this function. | |||
3776 | /// If that's true, then return '0' as the number of RefinementSteps to avoid | |||
3777 | /// any further refinement of the estimate. | |||
3778 | /// An empty SDValue return means no estimate sequence can be created. | |||
3779 | virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, | |||
3780 | int Enabled, int &RefinementSteps) const { | |||
3781 | return SDValue(); | |||
3782 | } | |||
3783 | ||||
3784 | //===--------------------------------------------------------------------===// | |||
3785 | // Legalization utility functions | |||
3786 | // | |||
3787 | ||||
3788 | /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, | |||
3789 | /// respectively, each computing an n/2-bit part of the result. | |||
3790 | /// \param Result A vector that will be filled with the parts of the result | |||
3791 | /// in little-endian order. | |||
3792 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter | |||
3793 | /// if you want to control how low bits are extracted from the LHS. | |||
3794 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. | |||
3795 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning | |||
3796 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. | |||
3797 | /// \returns true if the node has been expanded, false if it has not | |||
3798 | bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS, | |||
3799 | SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, | |||
3800 | SelectionDAG &DAG, MulExpansionKind Kind, | |||
3801 | SDValue LL = SDValue(), SDValue LH = SDValue(), | |||
3802 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; | |||
3803 | ||||
3804 | /// Expand a MUL into two nodes. One that computes the high bits of | |||
3805 | /// the result and one that computes the low bits. | |||
3806 | /// \param HiLoVT The value type to use for the Lo and Hi nodes. | |||
3807 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter | |||
3808 | /// if you want to control how low bits are extracted from the LHS. | |||
3809 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. | |||
3810 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning | |||
3811 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. | |||
3812 | /// \returns true if the node has been expanded. false if it has not | |||
3813 | bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, | |||
3814 | SelectionDAG &DAG, MulExpansionKind Kind, | |||
3815 | SDValue LL = SDValue(), SDValue LH = SDValue(), | |||
3816 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; | |||
3817 | ||||
3818 | /// Expand funnel shift. | |||
3819 | /// \param N Node to expand | |||
3820 | /// \param Result output after conversion | |||
3821 | /// \returns True, if the expansion was successful, false otherwise | |||
3822 | bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | |||
3823 | ||||
3824 | /// Expand rotations. | |||
3825 | /// \param N Node to expand | |||
3826 | /// \param Result output after conversion | |||
3827 | /// \returns True, if the expansion was successful, false otherwise | |||
3828 | bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | |||
3829 | ||||
3830 | /// Expand float(f32) to SINT(i64) conversion | |||
3831 | /// \param N Node to expand | |||
3832 | /// \param Result output after conversion | |||
3833 | /// \returns True, if the expansion was successful, false otherwise | |||
3834 | bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | |||
3835 | ||||
3836 | /// Expand float to UINT conversion | |||
3837 | /// \param N Node to expand | |||
3838 | /// \param Result output after conversion | |||
3839 | /// \returns True, if the expansion was successful, false otherwise | |||
3840 | bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | |||
3841 | ||||
3842 | /// Expand UINT(i64) to double(f64) conversion | |||
3843 | /// \param N Node to expand | |||
3844 | /// \param Result output after conversion | |||
3845 | /// \returns True, if the expansion was successful, false otherwise | |||
3846 | bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | |||
3847 | ||||
3848 | /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. | |||
3849 | SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; | |||
3850 | ||||
3851 | /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, | |||
3852 | /// vector nodes can only succeed if all operations are legal/custom. | |||
3853 | /// \param N Node to expand | |||
3854 | /// \param Result output after conversion | |||
3855 | /// \returns True, if the expansion was successful, false otherwise | |||
3856 | bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | |||
3857 | ||||
3858 | /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, | |||
3859 | /// vector nodes can only succeed if all operations are legal/custom. | |||
3860 | /// \param N Node to expand | |||
3861 | /// \param Result output after conversion | |||
3862 | /// \returns True, if the expansion was successful, false otherwise | |||
3863 | bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | |||
3864 | ||||
3865 | /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, | |||
3866 | /// vector nodes can only succeed if all operations are legal/custom. | |||
3867 | /// \param N Node to expand | |||
3868 | /// \param Result output after conversion | |||
3869 | /// \returns True, if the expansion was successful, false otherwise | |||
3870 | bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | |||
3871 | ||||
3872 | /// Expand ABS nodes. Expands vector/scalar ABS nodes, | |||
3873 | /// vector nodes can only succeed if all operations are legal/custom. | |||
3874 | /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) | |||
3875 | /// \param N Node to expand | |||
3876 | /// \param Result output after conversion | |||
3877 | /// \returns True, if the expansion was successful, false otherwise | |||
3878 | bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | |||
3879 | ||||
3880 | /// Turn load of vector type into a load of the individual elements. | |||
3881 | /// \param LD load to expand | |||
3882 | /// \returns MERGE_VALUEs of the scalar loads with their chains. | |||
3883 | SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const; | |||
3884 | ||||
3885 | // Turn a store of a vector type into stores of the individual elements. | |||
3886 | /// \param ST Store with a vector value type | |||
3887 | /// \returns MERGE_VALUs of the individual store chains. | |||
3888 | SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; | |||
3889 | ||||
3890 | /// Expands an unaligned load to 2 half-size loads for an integer, and | |||
3891 | /// possibly more for vectors. | |||
3892 | std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, | |||
3893 | SelectionDAG &DAG) const; | |||
3894 | ||||
3895 | /// Expands an unaligned store to 2 half-size stores for integer values, and | |||
3896 | /// possibly more for vectors. | |||
3897 | SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; | |||
3898 | ||||
3899 | /// Increments memory address \p Addr according to the type of the value | |||
3900 | /// \p DataVT that should be stored. If the data is stored in compressed | |||
3901 | /// form, the memory address should be incremented according to the number of | |||
3902 | /// the stored elements. This number is equal to the number of '1's bits | |||
3903 | /// in the \p Mask. | |||
3904 | /// \p DataVT is a vector type. \p Mask is a vector value. | |||
3905 | /// \p DataVT and \p Mask have the same number of vector elements. | |||
3906 | SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, | |||
3907 | EVT DataVT, SelectionDAG &DAG, | |||
3908 | bool IsCompressedMemory) const; | |||
3909 | ||||
3910 | /// Get a pointer to vector element \p Idx located in memory for a vector of | |||
3911 | /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of | |||
3912 | /// bounds the returned pointer is unspecified, but will be within the vector | |||
3913 | /// bounds. | |||
3914 | SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, | |||
3915 | SDValue Index) const; | |||
3916 | ||||
3917 | /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This | |||
3918 | /// method accepts integers as its arguments. | |||
3919 | SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; | |||
3920 | ||||
3921 | /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts | |||
3922 | /// integers as its arguments. | |||
3923 | SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; | |||
3924 | ||||
3925 | /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether | |||
3926 | /// expansion was successful and populates the Result and Overflow arguments. | |||
3927 | bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, | |||
3928 | SelectionDAG &DAG) const; | |||
3929 | ||||
3930 | /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, | |||
3931 | /// only the first Count elements of the vector are used. | |||
3932 | SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; | |||
3933 | ||||
3934 | //===--------------------------------------------------------------------===// | |||
3935 | // Instruction Emitting Hooks | |||
3936 | // | |||
3937 | ||||
3938 | /// This method should be implemented by targets that mark instructions with | |||
3939 | /// the 'usesCustomInserter' flag. These instructions are special in various | |||
3940 | /// ways, which require special support to insert. The specified MachineInstr | |||
3941 | /// is created but not inserted into any basic blocks, and this method is | |||
3942 | /// called to expand it into a sequence of instructions, potentially also | |||
3943 | /// creating new basic blocks and control flow. | |||
3944 | /// As long as the returned basic block is different (i.e., we created a new | |||
3945 | /// one), the custom inserter is free to modify the rest of \p MBB. | |||
3946 | virtual MachineBasicBlock * | |||
3947 | EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; | |||
3948 | ||||
3949 | /// This method should be implemented by targets that mark instructions with | |||
3950 | /// the 'hasPostISelHook' flag. These instructions must be adjusted after | |||
3951 | /// instruction selection by target hooks. e.g. To fill in optional defs for | |||
3952 | /// ARM 's' setting instructions. | |||
3953 | virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, | |||
3954 | SDNode *Node) const; | |||
3955 | ||||
3956 | /// If this function returns true, SelectionDAGBuilder emits a | |||
3957 | /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. | |||
3958 | virtual bool useLoadStackGuardNode() const { | |||
3959 | return false; | |||
3960 | } | |||
3961 | ||||
3962 | virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, | |||
3963 | const SDLoc &DL) const { | |||
3964 | llvm_unreachable("not implemented for this target")::llvm::llvm_unreachable_internal("not implemented for this target" , "/build/llvm-toolchain-snapshot-9~svn359999/include/llvm/CodeGen/TargetLowering.h" , 3964); | |||
3965 | } | |||
3966 | ||||
3967 | /// Lower TLS global address SDNode for target independent emulated TLS model. | |||
3968 | virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, | |||
3969 | SelectionDAG &DAG) const; | |||
3970 | ||||
3971 | /// Expands target specific indirect branch for the case of JumpTable | |||
3972 | /// expanasion. | |||
3973 | virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr, | |||
3974 | SelectionDAG &DAG) const { | |||
3975 | return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr); | |||
3976 | } | |||
3977 | ||||
3978 | // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) | |||
3979 | // If we're comparing for equality to zero and isCtlzFast is true, expose the | |||
3980 | // fact that this can be implemented as a ctlz/srl pair, so that the dag | |||
3981 | // combiner can fold the new nodes. | |||
3982 | SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; | |||
3983 | ||||
3984 | private: | |||
3985 | SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, | |||
3986 | const SDLoc &DL, DAGCombinerInfo &DCI) const; | |||
3987 | SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, | |||
3988 | const SDLoc &DL, DAGCombinerInfo &DCI) const; | |||
3989 | ||||
3990 | SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, | |||
3991 | SDValue N1, ISD::CondCode Cond, | |||
3992 | DAGCombinerInfo &DCI, | |||
3993 | const SDLoc &DL) const; | |||
3994 | }; | |||
3995 | ||||
3996 | /// Given an LLVM IR type and return type attributes, compute the return value | |||
3997 | /// EVTs and flags, and optionally also the offsets, if the return value is | |||
3998 | /// being lowered to memory. | |||
3999 | void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, | |||
4000 | SmallVectorImpl<ISD::OutputArg> &Outs, | |||
4001 | const TargetLowering &TLI, const DataLayout &DL); | |||
4002 | ||||
4003 | } // end namespace llvm | |||
4004 | ||||
4005 | #endif // LLVM_CODEGEN_TARGETLOWERING_H |