File: | lib/CodeGen/InterleavedLoadCombinePass.cpp |
Warning: | line 965, column 31 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // \file | |||
10 | // | |||
11 | // This file defines the interleaved-load-combine pass. The pass searches for | |||
12 | // ShuffleVectorInstruction that execute interleaving loads. If a matching | |||
13 | // pattern is found, it adds a combined load and further instructions in a | |||
14 | // pattern that is detectable by InterleavedAccesPass. The old instructions are | |||
15 | // left dead to be removed later. The pass is specifically designed to be | |||
16 | // executed just before InterleavedAccesPass to find any left-over instances | |||
17 | // that are not detected within former passes. | |||
18 | // | |||
19 | //===----------------------------------------------------------------------===// | |||
20 | ||||
21 | #include "llvm/ADT/Statistic.h" | |||
22 | #include "llvm/Analysis/MemoryLocation.h" | |||
23 | #include "llvm/Analysis/MemorySSA.h" | |||
24 | #include "llvm/Analysis/MemorySSAUpdater.h" | |||
25 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | |||
26 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
27 | #include "llvm/CodeGen/Passes.h" | |||
28 | #include "llvm/CodeGen/TargetLowering.h" | |||
29 | #include "llvm/CodeGen/TargetPassConfig.h" | |||
30 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | |||
31 | #include "llvm/IR/DataLayout.h" | |||
32 | #include "llvm/IR/Dominators.h" | |||
33 | #include "llvm/IR/Function.h" | |||
34 | #include "llvm/IR/Instructions.h" | |||
35 | #include "llvm/IR/LegacyPassManager.h" | |||
36 | #include "llvm/IR/Module.h" | |||
37 | #include "llvm/Pass.h" | |||
38 | #include "llvm/Support/Debug.h" | |||
39 | #include "llvm/Support/ErrorHandling.h" | |||
40 | #include "llvm/Support/raw_ostream.h" | |||
41 | #include "llvm/Target/TargetMachine.h" | |||
42 | ||||
43 | #include <algorithm> | |||
44 | #include <cassert> | |||
45 | #include <list> | |||
46 | ||||
47 | using namespace llvm; | |||
48 | ||||
49 | #define DEBUG_TYPE"interleaved-load-combine" "interleaved-load-combine" | |||
50 | ||||
51 | namespace { | |||
52 | ||||
53 | /// Statistic counter | |||
54 | STATISTIC(NumInterleavedLoadCombine, "Number of combined loads")static llvm::Statistic NumInterleavedLoadCombine = {"interleaved-load-combine" , "NumInterleavedLoadCombine", "Number of combined loads", {0 }, {false}}; | |||
55 | ||||
56 | /// Option to disable the pass | |||
57 | static cl::opt<bool> DisableInterleavedLoadCombine( | |||
58 | "disable-" DEBUG_TYPE"interleaved-load-combine", cl::init(false), cl::Hidden, | |||
59 | cl::desc("Disable combining of interleaved loads")); | |||
60 | ||||
61 | struct VectorInfo; | |||
62 | ||||
63 | struct InterleavedLoadCombineImpl { | |||
64 | public: | |||
65 | InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA, | |||
66 | TargetMachine &TM) | |||
67 | : F(F), DT(DT), MSSA(MSSA), | |||
68 | TLI(*TM.getSubtargetImpl(F)->getTargetLowering()), | |||
69 | TTI(TM.getTargetTransformInfo(F)) {} | |||
70 | ||||
71 | /// Scan the function for interleaved load candidates and execute the | |||
72 | /// replacement if applicable. | |||
73 | bool run(); | |||
74 | ||||
75 | private: | |||
76 | /// Function this pass is working on | |||
77 | Function &F; | |||
78 | ||||
79 | /// Dominator Tree Analysis | |||
80 | DominatorTree &DT; | |||
81 | ||||
82 | /// Memory Alias Analyses | |||
83 | MemorySSA &MSSA; | |||
84 | ||||
85 | /// Target Lowering Information | |||
86 | const TargetLowering &TLI; | |||
87 | ||||
88 | /// Target Transform Information | |||
89 | const TargetTransformInfo TTI; | |||
90 | ||||
91 | /// Find the instruction in sets LIs that dominates all others, return nullptr | |||
92 | /// if there is none. | |||
93 | LoadInst *findFirstLoad(const std::set<LoadInst *> &LIs); | |||
94 | ||||
95 | /// Replace interleaved load candidates. It does additional | |||
96 | /// analyses if this makes sense. Returns true on success and false | |||
97 | /// of nothing has been changed. | |||
98 | bool combine(std::list<VectorInfo> &InterleavedLoad, | |||
99 | OptimizationRemarkEmitter &ORE); | |||
100 | ||||
101 | /// Given a set of VectorInfo containing candidates for a given interleave | |||
102 | /// factor, find a set that represents a 'factor' interleaved load. | |||
103 | bool findPattern(std::list<VectorInfo> &Candidates, | |||
104 | std::list<VectorInfo> &InterleavedLoad, unsigned Factor, | |||
105 | const DataLayout &DL); | |||
106 | }; // InterleavedLoadCombine | |||
107 | ||||
108 | /// First Order Polynomial on an n-Bit Integer Value | |||
109 | /// | |||
110 | /// Polynomial(Value) = Value * B + A + E*2^(n-e) | |||
111 | /// | |||
112 | /// A and B are the coefficients. E*2^(n-e) is an error within 'e' most | |||
113 | /// significant bits. It is introduced if an exact computation cannot be proven | |||
114 | /// (e.q. division by 2). | |||
115 | /// | |||
116 | /// As part of this optimization multiple loads will be combined. It necessary | |||
117 | /// to prove that loads are within some relative offset to each other. This | |||
118 | /// class is used to prove relative offsets of values loaded from memory. | |||
119 | /// | |||
120 | /// Representing an integer in this form is sound since addition in two's | |||
121 | /// complement is associative (trivial) and multiplication distributes over the | |||
122 | /// addition (see Proof(1) in Polynomial::mul). Further, both operations | |||
123 | /// commute. | |||
124 | // | |||
125 | // Example: | |||
126 | // declare @fn(i64 %IDX, <4 x float>* %PTR) { | |||
127 | // %Pa1 = add i64 %IDX, 2 | |||
128 | // %Pa2 = lshr i64 %Pa1, 1 | |||
129 | // %Pa3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pa2 | |||
130 | // %Va = load <4 x float>, <4 x float>* %Pa3 | |||
131 | // | |||
132 | // %Pb1 = add i64 %IDX, 4 | |||
133 | // %Pb2 = lshr i64 %Pb1, 1 | |||
134 | // %Pb3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pb2 | |||
135 | // %Vb = load <4 x float>, <4 x float>* %Pb3 | |||
136 | // ... } | |||
137 | // | |||
138 | // The goal is to prove that two loads load consecutive addresses. | |||
139 | // | |||
140 | // In this case the polynomials are constructed by the following | |||
141 | // steps. | |||
142 | // | |||
143 | // The number tag #e specifies the error bits. | |||
144 | // | |||
145 | // Pa_0 = %IDX #0 | |||
146 | // Pa_1 = %IDX + 2 #0 | add 2 | |||
147 | // Pa_2 = %IDX/2 + 1 #1 | lshr 1 | |||
148 | // Pa_3 = %IDX/2 + 1 #1 | GEP, step signext to i64 | |||
149 | // Pa_4 = (%IDX/2)*16 + 16 #0 | GEP, multiply index by sizeof(4) for floats | |||
150 | // Pa_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components | |||
151 | // | |||
152 | // Pb_0 = %IDX #0 | |||
153 | // Pb_1 = %IDX + 4 #0 | add 2 | |||
154 | // Pb_2 = %IDX/2 + 2 #1 | lshr 1 | |||
155 | // Pb_3 = %IDX/2 + 2 #1 | GEP, step signext to i64 | |||
156 | // Pb_4 = (%IDX/2)*16 + 32 #0 | GEP, multiply index by sizeof(4) for floats | |||
157 | // Pb_5 = (%IDX/2)*16 + 16 #0 | GEP, add offset of leading components | |||
158 | // | |||
159 | // Pb_5 - Pa_5 = 16 #0 | subtract to get the offset | |||
160 | // | |||
161 | // Remark: %PTR is not maintained within this class. So in this instance the | |||
162 | // offset of 16 can only be assumed if the pointers are equal. | |||
163 | // | |||
164 | class Polynomial { | |||
165 | /// Operations on B | |||
166 | enum BOps { | |||
167 | LShr, | |||
168 | Mul, | |||
169 | SExt, | |||
170 | Trunc, | |||
171 | }; | |||
172 | ||||
173 | /// Number of Error Bits e | |||
174 | unsigned ErrorMSBs; | |||
175 | ||||
176 | /// Value | |||
177 | Value *V; | |||
178 | ||||
179 | /// Coefficient B | |||
180 | SmallVector<std::pair<BOps, APInt>, 4> B; | |||
181 | ||||
182 | /// Coefficient A | |||
183 | APInt A; | |||
184 | ||||
185 | public: | |||
186 | Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() { | |||
187 | IntegerType *Ty = dyn_cast<IntegerType>(V->getType()); | |||
188 | if (Ty) { | |||
189 | ErrorMSBs = 0; | |||
190 | this->V = V; | |||
191 | A = APInt(Ty->getBitWidth(), 0); | |||
192 | } | |||
193 | } | |||
194 | ||||
195 | Polynomial(const APInt &A, unsigned ErrorMSBs = 0) | |||
196 | : ErrorMSBs(ErrorMSBs), V(NULL__null), B(), A(A) {} | |||
197 | ||||
198 | Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0) | |||
199 | : ErrorMSBs(ErrorMSBs), V(NULL__null), B(), A(BitWidth, A) {} | |||
200 | ||||
201 | Polynomial() : ErrorMSBs((unsigned)-1), V(NULL__null), B(), A() {} | |||
202 | ||||
203 | /// Increment and clamp the number of undefined bits. | |||
204 | void incErrorMSBs(unsigned amt) { | |||
205 | if (ErrorMSBs == (unsigned)-1) | |||
206 | return; | |||
207 | ||||
208 | ErrorMSBs += amt; | |||
209 | if (ErrorMSBs > A.getBitWidth()) | |||
210 | ErrorMSBs = A.getBitWidth(); | |||
211 | } | |||
212 | ||||
213 | /// Decrement and clamp the number of undefined bits. | |||
214 | void decErrorMSBs(unsigned amt) { | |||
215 | if (ErrorMSBs == (unsigned)-1) | |||
216 | return; | |||
217 | ||||
218 | if (ErrorMSBs > amt) | |||
219 | ErrorMSBs -= amt; | |||
220 | else | |||
221 | ErrorMSBs = 0; | |||
222 | } | |||
223 | ||||
224 | /// Apply an add on the polynomial | |||
225 | Polynomial &add(const APInt &C) { | |||
226 | // Note: Addition is associative in two's complement even when in case of | |||
227 | // signed overflow. | |||
228 | // | |||
229 | // Error bits can only propagate into higher significant bits. As these are | |||
230 | // already regarded as undefined, there is no change. | |||
231 | // | |||
232 | // Theorem: Adding a constant to a polynomial does not change the error | |||
233 | // term. | |||
234 | // | |||
235 | // Proof: | |||
236 | // | |||
237 | // Since the addition is associative and commutes: | |||
238 | // | |||
239 | // (B + A + E*2^(n-e)) + C = B + (A + C) + E*2^(n-e) | |||
240 | // [qed] | |||
241 | ||||
242 | if (C.getBitWidth() != A.getBitWidth()) { | |||
243 | ErrorMSBs = (unsigned)-1; | |||
244 | return *this; | |||
245 | } | |||
246 | ||||
247 | A += C; | |||
248 | return *this; | |||
249 | } | |||
250 | ||||
251 | /// Apply a multiplication onto the polynomial. | |||
252 | Polynomial &mul(const APInt &C) { | |||
253 | // Note: Multiplication distributes over the addition | |||
254 | // | |||
255 | // Theorem: Multiplication distributes over the addition | |||
256 | // | |||
257 | // Proof(1): | |||
258 | // | |||
259 | // (B+A)*C =- | |||
260 | // = (B + A) + (B + A) + .. {C Times} | |||
261 | // addition is associative and commutes, hence | |||
262 | // = B + B + .. {C Times} .. + A + A + .. {C times} | |||
263 | // = B*C + A*C | |||
264 | // (see (function add) for signed values and overflows) | |||
265 | // [qed] | |||
266 | // | |||
267 | // Theorem: If C has c trailing zeros, errors bits in A or B are shifted out | |||
268 | // to the left. | |||
269 | // | |||
270 | // Proof(2): | |||
271 | // | |||
272 | // Let B' and A' be the n-Bit inputs with some unknown errors EA, | |||
273 | // EB at e leading bits. B' and A' can be written down as: | |||
274 | // | |||
275 | // B' = B + 2^(n-e)*EB | |||
276 | // A' = A + 2^(n-e)*EA | |||
277 | // | |||
278 | // Let C' be an input with c trailing zero bits. C' can be written as | |||
279 | // | |||
280 | // C' = C*2^c | |||
281 | // | |||
282 | // Therefore we can compute the result by using distributivity and | |||
283 | // commutativity. | |||
284 | // | |||
285 | // (B'*C' + A'*C') = [B + 2^(n-e)*EB] * C' + [A + 2^(n-e)*EA] * C' = | |||
286 | // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' = | |||
287 | // = (B'+A') * C' = | |||
288 | // = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' = | |||
289 | // = [B + A + 2^(n-e)*EB + 2^(n-e)*EA] * C' = | |||
290 | // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C' = | |||
291 | // = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C*2^c = | |||
292 | // = (B + A) * C' + C*(EB + EA)*2^(n-e)*2^c = | |||
293 | // | |||
294 | // Let EC be the final error with EC = C*(EB + EA) | |||
295 | // | |||
296 | // = (B + A)*C' + EC*2^(n-e)*2^c = | |||
297 | // = (B + A)*C' + EC*2^(n-(e-c)) | |||
298 | // | |||
299 | // Since EC is multiplied by 2^(n-(e-c)) the resulting error contains c | |||
300 | // less error bits than the input. c bits are shifted out to the left. | |||
301 | // [qed] | |||
302 | ||||
303 | if (C.getBitWidth() != A.getBitWidth()) { | |||
304 | ErrorMSBs = (unsigned)-1; | |||
305 | return *this; | |||
306 | } | |||
307 | ||||
308 | // Multiplying by one is a no-op. | |||
309 | if (C.isOneValue()) { | |||
310 | return *this; | |||
311 | } | |||
312 | ||||
313 | // Multiplying by zero removes the coefficient B and defines all bits. | |||
314 | if (C.isNullValue()) { | |||
315 | ErrorMSBs = 0; | |||
316 | deleteB(); | |||
317 | } | |||
318 | ||||
319 | // See Proof(2): Trailing zero bits indicate a left shift. This removes | |||
320 | // leading bits from the result even if they are undefined. | |||
321 | decErrorMSBs(C.countTrailingZeros()); | |||
322 | ||||
323 | A *= C; | |||
324 | pushBOperation(Mul, C); | |||
325 | return *this; | |||
326 | } | |||
327 | ||||
328 | /// Apply a logical shift right on the polynomial | |||
329 | Polynomial &lshr(const APInt &C) { | |||
330 | // Theorem(1): (B + A + E*2^(n-e)) >> 1 => (B >> 1) + (A >> 1) + E'*2^(n-e') | |||
331 | // where | |||
332 | // e' = e + 1, | |||
333 | // E is a e-bit number, | |||
334 | // E' is a e'-bit number, | |||
335 | // holds under the following precondition: | |||
336 | // pre(1): A % 2 = 0 | |||
337 | // pre(2): e < n, (see Theorem(2) for the trivial case with e=n) | |||
338 | // where >> expresses a logical shift to the right, with adding zeros. | |||
339 | // | |||
340 | // We need to show that for every, E there is a E' | |||
341 | // | |||
342 | // B = b_h * 2^(n-1) + b_m * 2 + b_l | |||
343 | // A = a_h * 2^(n-1) + a_m * 2 (pre(1)) | |||
344 | // | |||
345 | // where a_h, b_h, b_l are single bits, and a_m, b_m are (n-2) bit numbers | |||
346 | // | |||
347 | // Let X = (B + A + E*2^(n-e)) >> 1 | |||
348 | // Let Y = (B >> 1) + (A >> 1) + E*2^(n-e) >> 1 | |||
349 | // | |||
350 | // X = [B + A + E*2^(n-e)] >> 1 = | |||
351 | // = [ b_h * 2^(n-1) + b_m * 2 + b_l + | |||
352 | // + a_h * 2^(n-1) + a_m * 2 + | |||
353 | // + E * 2^(n-e) ] >> 1 = | |||
354 | // | |||
355 | // The sum is built by putting the overflow of [a_m + b+n] into the term | |||
356 | // 2^(n-1). As there are no more bits beyond 2^(n-1) the overflow within | |||
357 | // this bit is discarded. This is expressed by % 2. | |||
358 | // | |||
359 | // The bit in position 0 cannot overflow into the term (b_m + a_m). | |||
360 | // | |||
361 | // = [ ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-1) + | |||
362 | // + ((b_m + a_m) % 2^(n-2)) * 2 + | |||
363 | // + b_l + E * 2^(n-e) ] >> 1 = | |||
364 | // | |||
365 | // The shift is computed by dividing the terms by 2 and by cutting off | |||
366 | // b_l. | |||
367 | // | |||
368 | // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + | |||
369 | // + ((b_m + a_m) % 2^(n-2)) + | |||
370 | // + E * 2^(n-(e+1)) = | |||
371 | // | |||
372 | // by the definition in the Theorem e+1 = e' | |||
373 | // | |||
374 | // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + | |||
375 | // + ((b_m + a_m) % 2^(n-2)) + | |||
376 | // + E * 2^(n-e') = | |||
377 | // | |||
378 | // Compute Y by applying distributivity first | |||
379 | // | |||
380 | // Y = (B >> 1) + (A >> 1) + E*2^(n-e') = | |||
381 | // = (b_h * 2^(n-1) + b_m * 2 + b_l) >> 1 + | |||
382 | // + (a_h * 2^(n-1) + a_m * 2) >> 1 + | |||
383 | // + E * 2^(n-e) >> 1 = | |||
384 | // | |||
385 | // Again, the shift is computed by dividing the terms by 2 and by cutting | |||
386 | // off b_l. | |||
387 | // | |||
388 | // = b_h * 2^(n-2) + b_m + | |||
389 | // + a_h * 2^(n-2) + a_m + | |||
390 | // + E * 2^(n-(e+1)) = | |||
391 | // | |||
392 | // Again, the sum is built by putting the overflow of [a_m + b+n] into | |||
393 | // the term 2^(n-1). But this time there is room for a second bit in the | |||
394 | // term 2^(n-2) we add this bit to a new term and denote it o_h in a | |||
395 | // second step. | |||
396 | // | |||
397 | // = ([b_h + a_h + (b_m + a_m) >> (n-2)] >> 1) * 2^(n-1) + | |||
398 | // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + | |||
399 | // + ((b_m + a_m) % 2^(n-2)) + | |||
400 | // + E * 2^(n-(e+1)) = | |||
401 | // | |||
402 | // Let o_h = [b_h + a_h + (b_m + a_m) >> (n-2)] >> 1 | |||
403 | // Further replace e+1 by e'. | |||
404 | // | |||
405 | // = o_h * 2^(n-1) + | |||
406 | // + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + | |||
407 | // + ((b_m + a_m) % 2^(n-2)) + | |||
408 | // + E * 2^(n-e') = | |||
409 | // | |||
410 | // Move o_h into the error term and construct E'. To ensure that there is | |||
411 | // no 2^x with negative x, this step requires pre(2) (e < n). | |||
412 | // | |||
413 | // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + | |||
414 | // + ((b_m + a_m) % 2^(n-2)) + | |||
415 | // + o_h * 2^(e'-1) * 2^(n-e') + | pre(2), move 2^(e'-1) | |||
416 | // | out of the old exponent | |||
417 | // + E * 2^(n-e') = | |||
418 | // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + | |||
419 | // + ((b_m + a_m) % 2^(n-2)) + | |||
420 | // + [o_h * 2^(e'-1) + E] * 2^(n-e') + | move 2^(e'-1) out of | |||
421 | // | the old exponent | |||
422 | // | |||
423 | // Let E' = o_h * 2^(e'-1) + E | |||
424 | // | |||
425 | // = ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) + | |||
426 | // + ((b_m + a_m) % 2^(n-2)) + | |||
427 | // + E' * 2^(n-e') | |||
428 | // | |||
429 | // Because X and Y are distinct only in there error terms and E' can be | |||
430 | // constructed as shown the theorem holds. | |||
431 | // [qed] | |||
432 | // | |||
433 | // For completeness in case of the case e=n it is also required to show that | |||
434 | // distributivity can be applied. | |||
435 | // | |||
436 | // In this case Theorem(1) transforms to (the pre-condition on A can also be | |||
437 | // dropped) | |||
438 | // | |||
439 | // Theorem(2): (B + A + E) >> 1 => (B >> 1) + (A >> 1) + E' | |||
440 | // where | |||
441 | // A, B, E, E' are two's complement numbers with the same bit | |||
442 | // width | |||
443 | // | |||
444 | // Let A + B + E = X | |||
445 | // Let (B >> 1) + (A >> 1) = Y | |||
446 | // | |||
447 | // Therefore we need to show that for every X and Y there is an E' which | |||
448 | // makes the equation | |||
449 | // | |||
450 | // X = Y + E' | |||
451 | // | |||
452 | // hold. This is trivially the case for E' = X - Y. | |||
453 | // | |||
454 | // [qed] | |||
455 | // | |||
456 | // Remark: Distributing lshr with and arbitrary number n can be expressed as | |||
457 | // ((((B + A) lshr 1) lshr 1) ... ) {n times}. | |||
458 | // This construction induces n additional error bits at the left. | |||
459 | ||||
460 | if (C.getBitWidth() != A.getBitWidth()) { | |||
461 | ErrorMSBs = (unsigned)-1; | |||
462 | return *this; | |||
463 | } | |||
464 | ||||
465 | if (C.isNullValue()) | |||
466 | return *this; | |||
467 | ||||
468 | // Test if the result will be zero | |||
469 | unsigned shiftAmt = C.getZExtValue(); | |||
470 | if (shiftAmt >= C.getBitWidth()) | |||
471 | return mul(APInt(C.getBitWidth(), 0)); | |||
472 | ||||
473 | // The proof that shiftAmt LSBs are zero for at least one summand is only | |||
474 | // possible for the constant number. | |||
475 | // | |||
476 | // If this can be proven add shiftAmt to the error counter | |||
477 | // `ErrorMSBs`. Otherwise set all bits as undefined. | |||
478 | if (A.countTrailingZeros() < shiftAmt) | |||
479 | ErrorMSBs = A.getBitWidth(); | |||
480 | else | |||
481 | incErrorMSBs(shiftAmt); | |||
482 | ||||
483 | // Apply the operation. | |||
484 | pushBOperation(LShr, C); | |||
485 | A = A.lshr(shiftAmt); | |||
486 | ||||
487 | return *this; | |||
488 | } | |||
489 | ||||
490 | /// Apply a sign-extend or truncate operation on the polynomial. | |||
491 | Polynomial &sextOrTrunc(unsigned n) { | |||
492 | if (n < A.getBitWidth()) { | |||
493 | // Truncate: Clearly undefined Bits on the MSB side are removed | |||
494 | // if there are any. | |||
495 | decErrorMSBs(A.getBitWidth() - n); | |||
496 | A = A.trunc(n); | |||
497 | pushBOperation(Trunc, APInt(sizeof(n) * 8, n)); | |||
498 | } | |||
499 | if (n > A.getBitWidth()) { | |||
500 | // Extend: Clearly extending first and adding later is different | |||
501 | // to adding first and extending later in all extended bits. | |||
502 | incErrorMSBs(n - A.getBitWidth()); | |||
503 | A = A.sext(n); | |||
504 | pushBOperation(SExt, APInt(sizeof(n) * 8, n)); | |||
505 | } | |||
506 | ||||
507 | return *this; | |||
508 | } | |||
509 | ||||
510 | /// Test if there is a coefficient B. | |||
511 | bool isFirstOrder() const { return V != nullptr; } | |||
512 | ||||
513 | /// Test coefficient B of two Polynomials are equal. | |||
514 | bool isCompatibleTo(const Polynomial &o) const { | |||
515 | // The polynomial use different bit width. | |||
516 | if (A.getBitWidth() != o.A.getBitWidth()) | |||
517 | return false; | |||
518 | ||||
519 | // If neither Polynomial has the Coefficient B. | |||
520 | if (!isFirstOrder() && !o.isFirstOrder()) | |||
521 | return true; | |||
522 | ||||
523 | // The index variable is different. | |||
524 | if (V != o.V) | |||
525 | return false; | |||
526 | ||||
527 | // Check the operations. | |||
528 | if (B.size() != o.B.size()) | |||
529 | return false; | |||
530 | ||||
531 | auto ob = o.B.begin(); | |||
532 | for (auto &b : B) { | |||
533 | if (b != *ob) | |||
534 | return false; | |||
535 | ob++; | |||
536 | } | |||
537 | ||||
538 | return true; | |||
539 | } | |||
540 | ||||
541 | /// Subtract two polynomials, return an undefined polynomial if | |||
542 | /// subtraction is not possible. | |||
543 | Polynomial operator-(const Polynomial &o) const { | |||
544 | // Return an undefined polynomial if incompatible. | |||
545 | if (!isCompatibleTo(o)) | |||
546 | return Polynomial(); | |||
547 | ||||
548 | // If the polynomials are compatible (meaning they have the same | |||
549 | // coefficient on B), B is eliminated. Thus a polynomial solely | |||
550 | // containing A is returned | |||
551 | return Polynomial(A - o.A, std::max(ErrorMSBs, o.ErrorMSBs)); | |||
552 | } | |||
553 | ||||
554 | /// Subtract a constant from a polynomial, | |||
555 | Polynomial operator-(uint64_t C) const { | |||
556 | Polynomial Result(*this); | |||
557 | Result.A -= C; | |||
558 | return Result; | |||
559 | } | |||
560 | ||||
561 | /// Add a constant to a polynomial, | |||
562 | Polynomial operator+(uint64_t C) const { | |||
563 | Polynomial Result(*this); | |||
564 | Result.A += C; | |||
565 | return Result; | |||
566 | } | |||
567 | ||||
568 | /// Returns true if it can be proven that two Polynomials are equal. | |||
569 | bool isProvenEqualTo(const Polynomial &o) { | |||
570 | // Subtract both polynomials and test if it is fully defined and zero. | |||
571 | Polynomial r = *this - o; | |||
572 | return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue()); | |||
573 | } | |||
574 | ||||
575 | /// Print the polynomial into a stream. | |||
576 | void print(raw_ostream &OS) const { | |||
577 | OS << "[{#ErrBits:" << ErrorMSBs << "} "; | |||
578 | ||||
579 | if (V) { | |||
580 | for (auto b : B) | |||
581 | OS << "("; | |||
582 | OS << "(" << *V << ") "; | |||
583 | ||||
584 | for (auto b : B) { | |||
585 | switch (b.first) { | |||
586 | case LShr: | |||
587 | OS << "LShr "; | |||
588 | break; | |||
589 | case Mul: | |||
590 | OS << "Mul "; | |||
591 | break; | |||
592 | case SExt: | |||
593 | OS << "SExt "; | |||
594 | break; | |||
595 | case Trunc: | |||
596 | OS << "Trunc "; | |||
597 | break; | |||
598 | } | |||
599 | ||||
600 | OS << b.second << ") "; | |||
601 | } | |||
602 | } | |||
603 | ||||
604 | OS << "+ " << A << "]"; | |||
605 | } | |||
606 | ||||
607 | private: | |||
608 | void deleteB() { | |||
609 | V = nullptr; | |||
610 | B.clear(); | |||
611 | } | |||
612 | ||||
613 | void pushBOperation(const BOps Op, const APInt &C) { | |||
614 | if (isFirstOrder()) { | |||
615 | B.push_back(std::make_pair(Op, C)); | |||
616 | return; | |||
617 | } | |||
618 | } | |||
619 | }; | |||
620 | ||||
621 | #ifndef NDEBUG | |||
622 | static raw_ostream &operator<<(raw_ostream &OS, const Polynomial &S) { | |||
623 | S.print(OS); | |||
624 | return OS; | |||
625 | } | |||
626 | #endif | |||
627 | ||||
628 | /// VectorInfo stores abstract the following information for each vector | |||
629 | /// element: | |||
630 | /// | |||
631 | /// 1) The the memory address loaded into the element as Polynomial | |||
632 | /// 2) a set of load instruction necessary to construct the vector, | |||
633 | /// 3) a set of all other instructions that are necessary to create the vector and | |||
634 | /// 4) a pointer value that can be used as relative base for all elements. | |||
635 | struct VectorInfo { | |||
636 | private: | |||
637 | VectorInfo(const VectorInfo &c) : VTy(c.VTy) { | |||
638 | llvm_unreachable(::llvm::llvm_unreachable_internal("Copying VectorInfo is neither implemented nor necessary," , "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp" , 639) | |||
639 | "Copying VectorInfo is neither implemented nor necessary,")::llvm::llvm_unreachable_internal("Copying VectorInfo is neither implemented nor necessary," , "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp" , 639); | |||
640 | } | |||
641 | ||||
642 | public: | |||
643 | /// Information of a Vector Element | |||
644 | struct ElementInfo { | |||
645 | /// Offset Polynomial. | |||
646 | Polynomial Ofs; | |||
647 | ||||
648 | /// The Load Instruction used to Load the entry. LI is null if the pointer | |||
649 | /// of the load instruction does not point on to the entry | |||
650 | LoadInst *LI; | |||
651 | ||||
652 | ElementInfo(Polynomial Offset = Polynomial(), LoadInst *LI = nullptr) | |||
653 | : Ofs(Offset), LI(LI) {} | |||
654 | }; | |||
655 | ||||
656 | /// Basic-block the load instructions are within | |||
657 | BasicBlock *BB; | |||
658 | ||||
659 | /// Pointer value of all participation load instructions | |||
660 | Value *PV; | |||
661 | ||||
662 | /// Participating load instructions | |||
663 | std::set<LoadInst *> LIs; | |||
664 | ||||
665 | /// Participating instructions | |||
666 | std::set<Instruction *> Is; | |||
667 | ||||
668 | /// Final shuffle-vector instruction | |||
669 | ShuffleVectorInst *SVI; | |||
670 | ||||
671 | /// Information of the offset for each vector element | |||
672 | ElementInfo *EI; | |||
673 | ||||
674 | /// Vector Type | |||
675 | VectorType *const VTy; | |||
676 | ||||
677 | VectorInfo(VectorType *VTy) | |||
678 | : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) { | |||
679 | EI = new ElementInfo[VTy->getNumElements()]; | |||
680 | } | |||
681 | ||||
682 | virtual ~VectorInfo() { delete[] EI; } | |||
683 | ||||
684 | unsigned getDimension() const { return VTy->getNumElements(); } | |||
685 | ||||
686 | /// Test if the VectorInfo can be part of an interleaved load with the | |||
687 | /// specified factor. | |||
688 | /// | |||
689 | /// \param Factor of the interleave | |||
690 | /// \param DL Targets Datalayout | |||
691 | /// | |||
692 | /// \returns true if this is possible and false if not | |||
693 | bool isInterleaved(unsigned Factor, const DataLayout &DL) const { | |||
694 | unsigned Size = DL.getTypeAllocSize(VTy->getElementType()); | |||
695 | for (unsigned i = 1; i < getDimension(); i++) { | |||
696 | if (!EI[i].Ofs.isProvenEqualTo(EI[0].Ofs + i * Factor * Size)) { | |||
697 | return false; | |||
698 | } | |||
699 | } | |||
700 | return true; | |||
701 | } | |||
702 | ||||
703 | /// Recursively computes the vector information stored in V. | |||
704 | /// | |||
705 | /// This function delegates the work to specialized implementations | |||
706 | /// | |||
707 | /// \param V Value to operate on | |||
708 | /// \param Result Result of the computation | |||
709 | /// | |||
710 | /// \returns false if no sensible information can be gathered. | |||
711 | static bool compute(Value *V, VectorInfo &Result, const DataLayout &DL) { | |||
712 | ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V); | |||
713 | if (SVI) | |||
| ||||
714 | return computeFromSVI(SVI, Result, DL); | |||
715 | LoadInst *LI = dyn_cast<LoadInst>(V); | |||
716 | if (LI) | |||
717 | return computeFromLI(LI, Result, DL); | |||
718 | BitCastInst *BCI = dyn_cast<BitCastInst>(V); | |||
719 | if (BCI) | |||
720 | return computeFromBCI(BCI, Result, DL); | |||
721 | return false; | |||
722 | } | |||
723 | ||||
724 | /// BitCastInst specialization to compute the vector information. | |||
725 | /// | |||
726 | /// \param BCI BitCastInst to operate on | |||
727 | /// \param Result Result of the computation | |||
728 | /// | |||
729 | /// \returns false if no sensible information can be gathered. | |||
730 | static bool computeFromBCI(BitCastInst *BCI, VectorInfo &Result, | |||
731 | const DataLayout &DL) { | |||
732 | Instruction *Op = dyn_cast<Instruction>(BCI->getOperand(0)); | |||
733 | ||||
734 | if (!Op) | |||
735 | return false; | |||
736 | ||||
737 | VectorType *VTy = dyn_cast<VectorType>(Op->getType()); | |||
738 | if (!VTy) | |||
739 | return false; | |||
740 | ||||
741 | // We can only cast from large to smaller vectors | |||
742 | if (Result.VTy->getNumElements() % VTy->getNumElements()) | |||
743 | return false; | |||
744 | ||||
745 | unsigned Factor = Result.VTy->getNumElements() / VTy->getNumElements(); | |||
746 | unsigned NewSize = DL.getTypeAllocSize(Result.VTy->getElementType()); | |||
747 | unsigned OldSize = DL.getTypeAllocSize(VTy->getElementType()); | |||
748 | ||||
749 | if (NewSize * Factor != OldSize) | |||
750 | return false; | |||
751 | ||||
752 | VectorInfo Old(VTy); | |||
753 | if (!compute(Op, Old, DL)) | |||
754 | return false; | |||
755 | ||||
756 | for (unsigned i = 0; i < Result.VTy->getNumElements(); i += Factor) { | |||
757 | for (unsigned j = 0; j < Factor; j++) { | |||
758 | Result.EI[i + j] = | |||
759 | ElementInfo(Old.EI[i / Factor].Ofs + j * NewSize, | |||
760 | j == 0 ? Old.EI[i / Factor].LI : nullptr); | |||
761 | } | |||
762 | } | |||
763 | ||||
764 | Result.BB = Old.BB; | |||
765 | Result.PV = Old.PV; | |||
766 | Result.LIs.insert(Old.LIs.begin(), Old.LIs.end()); | |||
767 | Result.Is.insert(Old.Is.begin(), Old.Is.end()); | |||
768 | Result.Is.insert(BCI); | |||
769 | Result.SVI = nullptr; | |||
770 | ||||
771 | return true; | |||
772 | } | |||
773 | ||||
774 | /// ShuffleVectorInst specialization to compute vector information. | |||
775 | /// | |||
776 | /// \param SVI ShuffleVectorInst to operate on | |||
777 | /// \param Result Result of the computation | |||
778 | /// | |||
779 | /// Compute the left and the right side vector information and merge them by | |||
780 | /// applying the shuffle operation. This function also ensures that the left | |||
781 | /// and right side have compatible loads. This means that all loads are with | |||
782 | /// in the same basic block and are based on the same pointer. | |||
783 | /// | |||
784 | /// \returns false if no sensible information can be gathered. | |||
785 | static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result, | |||
786 | const DataLayout &DL) { | |||
787 | VectorType *ArgTy = dyn_cast<VectorType>(SVI->getOperand(0)->getType()); | |||
788 | assert(ArgTy && "ShuffleVector Operand is not a VectorType")((ArgTy && "ShuffleVector Operand is not a VectorType" ) ? static_cast<void> (0) : __assert_fail ("ArgTy && \"ShuffleVector Operand is not a VectorType\"" , "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp" , 788, __PRETTY_FUNCTION__)); | |||
789 | ||||
790 | // Compute the left hand vector information. | |||
791 | VectorInfo LHS(ArgTy); | |||
792 | if (!compute(SVI->getOperand(0), LHS, DL)) | |||
793 | LHS.BB = nullptr; | |||
794 | ||||
795 | // Compute the right hand vector information. | |||
796 | VectorInfo RHS(ArgTy); | |||
797 | if (!compute(SVI->getOperand(1), RHS, DL)) | |||
798 | RHS.BB = nullptr; | |||
799 | ||||
800 | // Neither operand produced sensible results? | |||
801 | if (!LHS.BB && !RHS.BB) | |||
802 | return false; | |||
803 | // Only RHS produced sensible results? | |||
804 | else if (!LHS.BB) { | |||
805 | Result.BB = RHS.BB; | |||
806 | Result.PV = RHS.PV; | |||
807 | } | |||
808 | // Only LHS produced sensible results? | |||
809 | else if (!RHS.BB) { | |||
810 | Result.BB = LHS.BB; | |||
811 | Result.PV = LHS.PV; | |||
812 | } | |||
813 | // Both operands produced sensible results? | |||
814 | else if ((LHS.BB == RHS.BB) && (LHS.PV == RHS.PV)) { | |||
815 | Result.BB = LHS.BB; | |||
816 | Result.PV = LHS.PV; | |||
817 | } | |||
818 | // Both operands produced sensible results but they are incompatible. | |||
819 | else { | |||
820 | return false; | |||
821 | } | |||
822 | ||||
823 | // Merge and apply the operation on the offset information. | |||
824 | if (LHS.BB) { | |||
825 | Result.LIs.insert(LHS.LIs.begin(), LHS.LIs.end()); | |||
826 | Result.Is.insert(LHS.Is.begin(), LHS.Is.end()); | |||
827 | } | |||
828 | if (RHS.BB) { | |||
829 | Result.LIs.insert(RHS.LIs.begin(), RHS.LIs.end()); | |||
830 | Result.Is.insert(RHS.Is.begin(), RHS.Is.end()); | |||
831 | } | |||
832 | Result.Is.insert(SVI); | |||
833 | Result.SVI = SVI; | |||
834 | ||||
835 | int j = 0; | |||
836 | for (int i : SVI->getShuffleMask()) { | |||
837 | assert((i < 2 * (signed)ArgTy->getNumElements()) &&(((i < 2 * (signed)ArgTy->getNumElements()) && "Invalid ShuffleVectorInst (index out of bounds)" ) ? static_cast<void> (0) : __assert_fail ("(i < 2 * (signed)ArgTy->getNumElements()) && \"Invalid ShuffleVectorInst (index out of bounds)\"" , "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp" , 838, __PRETTY_FUNCTION__)) | |||
838 | "Invalid ShuffleVectorInst (index out of bounds)")(((i < 2 * (signed)ArgTy->getNumElements()) && "Invalid ShuffleVectorInst (index out of bounds)" ) ? static_cast<void> (0) : __assert_fail ("(i < 2 * (signed)ArgTy->getNumElements()) && \"Invalid ShuffleVectorInst (index out of bounds)\"" , "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp" , 838, __PRETTY_FUNCTION__)); | |||
839 | ||||
840 | if (i < 0) | |||
841 | Result.EI[j] = ElementInfo(); | |||
842 | else if (i < (signed)ArgTy->getNumElements()) { | |||
843 | if (LHS.BB) | |||
844 | Result.EI[j] = LHS.EI[i]; | |||
845 | else | |||
846 | Result.EI[j] = ElementInfo(); | |||
847 | } else { | |||
848 | if (RHS.BB) | |||
849 | Result.EI[j] = RHS.EI[i - ArgTy->getNumElements()]; | |||
850 | else | |||
851 | Result.EI[j] = ElementInfo(); | |||
852 | } | |||
853 | j++; | |||
854 | } | |||
855 | ||||
856 | return true; | |||
857 | } | |||
858 | ||||
859 | /// LoadInst specialization to compute vector information. | |||
860 | /// | |||
861 | /// This function also acts as abort condition to the recursion. | |||
862 | /// | |||
863 | /// \param LI LoadInst to operate on | |||
864 | /// \param Result Result of the computation | |||
865 | /// | |||
866 | /// \returns false if no sensible information can be gathered. | |||
867 | static bool computeFromLI(LoadInst *LI, VectorInfo &Result, | |||
868 | const DataLayout &DL) { | |||
869 | Value *BasePtr; | |||
870 | Polynomial Offset; | |||
871 | ||||
872 | if (LI->isVolatile()) | |||
873 | return false; | |||
874 | ||||
875 | if (LI->isAtomic()) | |||
876 | return false; | |||
877 | ||||
878 | // Get the base polynomial | |||
879 | computePolynomialFromPointer(*LI->getPointerOperand(), Offset, BasePtr, DL); | |||
880 | ||||
881 | Result.BB = LI->getParent(); | |||
882 | Result.PV = BasePtr; | |||
883 | Result.LIs.insert(LI); | |||
884 | Result.Is.insert(LI); | |||
885 | ||||
886 | for (unsigned i = 0; i < Result.getDimension(); i++) { | |||
887 | Value *Idx[2] = { | |||
888 | ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0), | |||
889 | ConstantInt::get(Type::getInt32Ty(LI->getContext()), i), | |||
890 | }; | |||
891 | int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, makeArrayRef(Idx, 2)); | |||
892 | Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr); | |||
893 | } | |||
894 | ||||
895 | return true; | |||
896 | } | |||
897 | ||||
898 | /// Recursively compute polynomial of a value. | |||
899 | /// | |||
900 | /// \param BO Input binary operation | |||
901 | /// \param Result Result polynomial | |||
902 | static void computePolynomialBinOp(BinaryOperator &BO, Polynomial &Result) { | |||
903 | Value *LHS = BO.getOperand(0); | |||
904 | Value *RHS = BO.getOperand(1); | |||
905 | ||||
906 | // Find the RHS Constant if any | |||
907 | ConstantInt *C = dyn_cast<ConstantInt>(RHS); | |||
908 | if ((!C) && BO.isCommutative()) { | |||
909 | C = dyn_cast<ConstantInt>(LHS); | |||
910 | if (C) | |||
911 | std::swap(LHS, RHS); | |||
912 | } | |||
913 | ||||
914 | switch (BO.getOpcode()) { | |||
915 | case Instruction::Add: | |||
916 | if (!C) | |||
917 | break; | |||
918 | ||||
919 | computePolynomial(*LHS, Result); | |||
920 | Result.add(C->getValue()); | |||
921 | return; | |||
922 | ||||
923 | case Instruction::LShr: | |||
924 | if (!C) | |||
925 | break; | |||
926 | ||||
927 | computePolynomial(*LHS, Result); | |||
928 | Result.lshr(C->getValue()); | |||
929 | return; | |||
930 | ||||
931 | default: | |||
932 | break; | |||
933 | } | |||
934 | ||||
935 | Result = Polynomial(&BO); | |||
936 | } | |||
937 | ||||
938 | /// Recursively compute polynomial of a value | |||
939 | /// | |||
940 | /// \param V input value | |||
941 | /// \param Result result polynomial | |||
942 | static void computePolynomial(Value &V, Polynomial &Result) { | |||
943 | if (isa<BinaryOperator>(&V)) | |||
944 | computePolynomialBinOp(*dyn_cast<BinaryOperator>(&V), Result); | |||
945 | else | |||
946 | Result = Polynomial(&V); | |||
947 | } | |||
948 | ||||
949 | /// Compute the Polynomial representation of a Pointer type. | |||
950 | /// | |||
951 | /// \param Ptr input pointer value | |||
952 | /// \param Result result polynomial | |||
953 | /// \param BasePtr pointer the polynomial is based on | |||
954 | /// \param DL Datalayout of the target machine | |||
955 | static void computePolynomialFromPointer(Value &Ptr, Polynomial &Result, | |||
956 | Value *&BasePtr, | |||
957 | const DataLayout &DL) { | |||
958 | // Not a pointer type? Return an undefined polynomial | |||
959 | PointerType *PtrTy = dyn_cast<PointerType>(Ptr.getType()); | |||
960 | if (!PtrTy) { | |||
961 | Result = Polynomial(); | |||
962 | BasePtr = nullptr; | |||
963 | } | |||
964 | unsigned PointerBits = | |||
965 | DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()); | |||
| ||||
966 | ||||
967 | /// Skip pointer casts. Return Zero polynomial otherwise | |||
968 | if (isa<CastInst>(&Ptr)) { | |||
969 | CastInst &CI = *cast<CastInst>(&Ptr); | |||
970 | switch (CI.getOpcode()) { | |||
971 | case Instruction::BitCast: | |||
972 | computePolynomialFromPointer(*CI.getOperand(0), Result, BasePtr, DL); | |||
973 | break; | |||
974 | default: | |||
975 | BasePtr = &Ptr; | |||
976 | Polynomial(PointerBits, 0); | |||
977 | break; | |||
978 | } | |||
979 | } | |||
980 | /// Resolve GetElementPtrInst. | |||
981 | else if (isa<GetElementPtrInst>(&Ptr)) { | |||
982 | GetElementPtrInst &GEP = *cast<GetElementPtrInst>(&Ptr); | |||
983 | ||||
984 | APInt BaseOffset(PointerBits, 0); | |||
985 | ||||
986 | // Check if we can compute the Offset with accumulateConstantOffset | |||
987 | if (GEP.accumulateConstantOffset(DL, BaseOffset)) { | |||
988 | Result = Polynomial(BaseOffset); | |||
989 | BasePtr = GEP.getPointerOperand(); | |||
990 | return; | |||
991 | } else { | |||
992 | // Otherwise we allow that the last index operand of the GEP is | |||
993 | // non-constant. | |||
994 | unsigned idxOperand, e; | |||
995 | SmallVector<Value *, 4> Indices; | |||
996 | for (idxOperand = 1, e = GEP.getNumOperands(); idxOperand < e; | |||
997 | idxOperand++) { | |||
998 | ConstantInt *IDX = dyn_cast<ConstantInt>(GEP.getOperand(idxOperand)); | |||
999 | if (!IDX) | |||
1000 | break; | |||
1001 | Indices.push_back(IDX); | |||
1002 | } | |||
1003 | ||||
1004 | // It must also be the last operand. | |||
1005 | if (idxOperand + 1 != e) { | |||
1006 | Result = Polynomial(); | |||
1007 | BasePtr = nullptr; | |||
1008 | return; | |||
1009 | } | |||
1010 | ||||
1011 | // Compute the polynomial of the index operand. | |||
1012 | computePolynomial(*GEP.getOperand(idxOperand), Result); | |||
1013 | ||||
1014 | // Compute base offset from zero based index, excluding the last | |||
1015 | // variable operand. | |||
1016 | BaseOffset = | |||
1017 | DL.getIndexedOffsetInType(GEP.getSourceElementType(), Indices); | |||
1018 | ||||
1019 | // Apply the operations of GEP to the polynomial. | |||
1020 | unsigned ResultSize = DL.getTypeAllocSize(GEP.getResultElementType()); | |||
1021 | Result.sextOrTrunc(PointerBits); | |||
1022 | Result.mul(APInt(PointerBits, ResultSize)); | |||
1023 | Result.add(BaseOffset); | |||
1024 | BasePtr = GEP.getPointerOperand(); | |||
1025 | } | |||
1026 | } | |||
1027 | // All other instructions are handled by using the value as base pointer and | |||
1028 | // a zero polynomial. | |||
1029 | else { | |||
1030 | BasePtr = &Ptr; | |||
1031 | Polynomial(DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()), 0); | |||
1032 | } | |||
1033 | } | |||
1034 | ||||
1035 | #ifndef NDEBUG | |||
1036 | void print(raw_ostream &OS) const { | |||
1037 | if (PV) | |||
1038 | OS << *PV; | |||
1039 | else | |||
1040 | OS << "(none)"; | |||
1041 | OS << " + "; | |||
1042 | for (unsigned i = 0; i < getDimension(); i++) | |||
1043 | OS << ((i == 0) ? "[" : ", ") << EI[i].Ofs; | |||
1044 | OS << "]"; | |||
1045 | } | |||
1046 | #endif | |||
1047 | }; | |||
1048 | ||||
1049 | } // anonymous namespace | |||
1050 | ||||
1051 | bool InterleavedLoadCombineImpl::findPattern( | |||
1052 | std::list<VectorInfo> &Candidates, std::list<VectorInfo> &InterleavedLoad, | |||
1053 | unsigned Factor, const DataLayout &DL) { | |||
1054 | for (auto C0 = Candidates.begin(), E0 = Candidates.end(); C0 != E0; ++C0) { | |||
1055 | unsigned i; | |||
1056 | // Try to find an interleaved load using the front of Worklist as first line | |||
1057 | unsigned Size = DL.getTypeAllocSize(C0->VTy->getElementType()); | |||
1058 | ||||
1059 | // List containing iterators pointing to the VectorInfos of the candidates | |||
1060 | std::vector<std::list<VectorInfo>::iterator> Res(Factor, Candidates.end()); | |||
1061 | ||||
1062 | for (auto C = Candidates.begin(), E = Candidates.end(); C != E; C++) { | |||
1063 | if (C->VTy != C0->VTy) | |||
1064 | continue; | |||
1065 | if (C->BB != C0->BB) | |||
1066 | continue; | |||
1067 | if (C->PV != C0->PV) | |||
1068 | continue; | |||
1069 | ||||
1070 | // Check the current value matches any of factor - 1 remaining lines | |||
1071 | for (i = 1; i < Factor; i++) { | |||
1072 | if (C->EI[0].Ofs.isProvenEqualTo(C0->EI[0].Ofs + i * Size)) { | |||
1073 | Res[i] = C; | |||
1074 | } | |||
1075 | } | |||
1076 | ||||
1077 | for (i = 1; i < Factor; i++) { | |||
1078 | if (Res[i] == Candidates.end()) | |||
1079 | break; | |||
1080 | } | |||
1081 | if (i == Factor) { | |||
1082 | Res[0] = C0; | |||
1083 | break; | |||
1084 | } | |||
1085 | } | |||
1086 | ||||
1087 | if (Res[0] != Candidates.end()) { | |||
1088 | // Move the result into the output | |||
1089 | for (unsigned i = 0; i < Factor; i++) { | |||
1090 | InterleavedLoad.splice(InterleavedLoad.end(), Candidates, Res[i]); | |||
1091 | } | |||
1092 | ||||
1093 | return true; | |||
1094 | } | |||
1095 | } | |||
1096 | return false; | |||
1097 | } | |||
1098 | ||||
1099 | LoadInst * | |||
1100 | InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) { | |||
1101 | assert(!LIs.empty() && "No load instructions given.")((!LIs.empty() && "No load instructions given.") ? static_cast <void> (0) : __assert_fail ("!LIs.empty() && \"No load instructions given.\"" , "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp" , 1101, __PRETTY_FUNCTION__)); | |||
1102 | ||||
1103 | // All LIs are within the same BB. Select the first for a reference. | |||
1104 | BasicBlock *BB = (*LIs.begin())->getParent(); | |||
1105 | BasicBlock::iterator FLI = | |||
1106 | std::find_if(BB->begin(), BB->end(), [&LIs](Instruction &I) -> bool { | |||
1107 | return is_contained(LIs, &I); | |||
1108 | }); | |||
1109 | assert(FLI != BB->end())((FLI != BB->end()) ? static_cast<void> (0) : __assert_fail ("FLI != BB->end()", "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp" , 1109, __PRETTY_FUNCTION__)); | |||
1110 | ||||
1111 | return cast<LoadInst>(FLI); | |||
1112 | } | |||
1113 | ||||
1114 | bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, | |||
1115 | OptimizationRemarkEmitter &ORE) { | |||
1116 | LLVM_DEBUG(dbgs() << "Checking interleaved load\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("interleaved-load-combine")) { dbgs() << "Checking interleaved load\n" ; } } while (false); | |||
1117 | ||||
1118 | // The insertion point is the LoadInst which loads the first values. The | |||
1119 | // following tests are used to proof that the combined load can be inserted | |||
1120 | // just before InsertionPoint. | |||
1121 | LoadInst *InsertionPoint = InterleavedLoad.front().EI[0].LI; | |||
1122 | ||||
1123 | // Test if the offset is computed | |||
1124 | if (!InsertionPoint) | |||
1125 | return false; | |||
1126 | ||||
1127 | std::set<LoadInst *> LIs; | |||
1128 | std::set<Instruction *> Is; | |||
1129 | std::set<Instruction *> SVIs; | |||
1130 | ||||
1131 | unsigned InterleavedCost; | |||
1132 | unsigned InstructionCost = 0; | |||
1133 | ||||
1134 | // Get the interleave factor | |||
1135 | unsigned Factor = InterleavedLoad.size(); | |||
1136 | ||||
1137 | // Merge all input sets used in analysis | |||
1138 | for (auto &VI : InterleavedLoad) { | |||
1139 | // Generate a set of all load instructions to be combined | |||
1140 | LIs.insert(VI.LIs.begin(), VI.LIs.end()); | |||
1141 | ||||
1142 | // Generate a set of all instructions taking part in load | |||
1143 | // interleaved. This list excludes the instructions necessary for the | |||
1144 | // polynomial construction. | |||
1145 | Is.insert(VI.Is.begin(), VI.Is.end()); | |||
1146 | ||||
1147 | // Generate the set of the final ShuffleVectorInst. | |||
1148 | SVIs.insert(VI.SVI); | |||
1149 | } | |||
1150 | ||||
1151 | // There is nothing to combine. | |||
1152 | if (LIs.size() < 2) | |||
1153 | return false; | |||
1154 | ||||
1155 | // Test if all participating instruction will be dead after the | |||
1156 | // transformation. If intermediate results are used, no performance gain can | |||
1157 | // be expected. Also sum the cost of the Instructions beeing left dead. | |||
1158 | for (auto &I : Is) { | |||
1159 | // Compute the old cost | |||
1160 | InstructionCost += | |||
1161 | TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency); | |||
1162 | ||||
1163 | // The final SVIs are allowed not to be dead, all uses will be replaced | |||
1164 | if (SVIs.find(I) != SVIs.end()) | |||
1165 | continue; | |||
1166 | ||||
1167 | // If there are users outside the set to be eliminated, we abort the | |||
1168 | // transformation. No gain can be expected. | |||
1169 | for (const auto &U : I->users()) { | |||
1170 | if (Is.find(dyn_cast<Instruction>(U)) == Is.end()) | |||
1171 | return false; | |||
1172 | } | |||
1173 | } | |||
1174 | ||||
1175 | // We know that all LoadInst are within the same BB. This guarantees that | |||
1176 | // either everything or nothing is loaded. | |||
1177 | LoadInst *First = findFirstLoad(LIs); | |||
1178 | ||||
1179 | // To be safe that the loads can be combined, iterate over all loads and test | |||
1180 | // that the corresponding defining access dominates first LI. This guarantees | |||
1181 | // that there are no aliasing stores in between the loads. | |||
1182 | auto FMA = MSSA.getMemoryAccess(First); | |||
1183 | for (auto LI : LIs) { | |||
1184 | auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess(); | |||
1185 | if (!MSSA.dominates(MADef, FMA)) | |||
1186 | return false; | |||
1187 | } | |||
1188 | assert(!LIs.empty() && "There are no LoadInst to combine")((!LIs.empty() && "There are no LoadInst to combine") ? static_cast<void> (0) : __assert_fail ("!LIs.empty() && \"There are no LoadInst to combine\"" , "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp" , 1188, __PRETTY_FUNCTION__)); | |||
1189 | ||||
1190 | // It is necessary that insertion point dominates all final ShuffleVectorInst. | |||
1191 | for (auto &VI : InterleavedLoad) { | |||
1192 | if (!DT.dominates(InsertionPoint, VI.SVI)) | |||
1193 | return false; | |||
1194 | } | |||
1195 | ||||
1196 | // All checks are done. Add instructions detectable by InterleavedAccessPass | |||
1197 | // The old instruction will are left dead. | |||
1198 | IRBuilder<> Builder(InsertionPoint); | |||
1199 | Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType(); | |||
1200 | unsigned ElementsPerSVI = | |||
1201 | InterleavedLoad.front().SVI->getType()->getNumElements(); | |||
1202 | VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI); | |||
1203 | ||||
1204 | SmallVector<unsigned, 4> Indices; | |||
1205 | for (unsigned i = 0; i < Factor; i++) | |||
1206 | Indices.push_back(i); | |||
1207 | InterleavedCost = TTI.getInterleavedMemoryOpCost( | |||
1208 | Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlignment(), | |||
1209 | InsertionPoint->getPointerAddressSpace()); | |||
1210 | ||||
1211 | if (InterleavedCost >= InstructionCost) { | |||
1212 | return false; | |||
1213 | } | |||
1214 | ||||
1215 | // Create a pointer cast for the wide load. | |||
1216 | auto CI = Builder.CreatePointerCast(InsertionPoint->getOperand(0), | |||
1217 | ILTy->getPointerTo(), | |||
1218 | "interleaved.wide.ptrcast"); | |||
1219 | ||||
1220 | // Create the wide load and update the MemorySSA. | |||
1221 | auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlignment(), | |||
1222 | "interleaved.wide.load"); | |||
1223 | auto MSSAU = MemorySSAUpdater(&MSSA); | |||
1224 | MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore( | |||
1225 | LI, nullptr, MSSA.getMemoryAccess(InsertionPoint))); | |||
1226 | MSSAU.insertUse(MSSALoad); | |||
1227 | ||||
1228 | // Create the final SVIs and replace all uses. | |||
1229 | int i = 0; | |||
1230 | for (auto &VI : InterleavedLoad) { | |||
1231 | SmallVector<uint32_t, 4> Mask; | |||
1232 | for (unsigned j = 0; j < ElementsPerSVI; j++) | |||
1233 | Mask.push_back(i + j * Factor); | |||
1234 | ||||
1235 | Builder.SetInsertPoint(VI.SVI); | |||
1236 | auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()), | |||
1237 | Mask, "interleaved.shuffle"); | |||
1238 | VI.SVI->replaceAllUsesWith(SVI); | |||
1239 | i++; | |||
1240 | } | |||
1241 | ||||
1242 | NumInterleavedLoadCombine++; | |||
1243 | ORE.emit([&]() { | |||
1244 | return OptimizationRemark(DEBUG_TYPE"interleaved-load-combine", "Combined Interleaved Load", LI) | |||
1245 | << "Load interleaved combined with factor " | |||
1246 | << ore::NV("Factor", Factor); | |||
1247 | }); | |||
1248 | ||||
1249 | return true; | |||
1250 | } | |||
1251 | ||||
1252 | bool InterleavedLoadCombineImpl::run() { | |||
1253 | OptimizationRemarkEmitter ORE(&F); | |||
1254 | bool changed = false; | |||
1255 | unsigned MaxFactor = TLI.getMaxSupportedInterleaveFactor(); | |||
1256 | ||||
1257 | auto &DL = F.getParent()->getDataLayout(); | |||
1258 | ||||
1259 | // Start with the highest factor to avoid combining and recombining. | |||
1260 | for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) { | |||
1261 | std::list<VectorInfo> Candidates; | |||
1262 | ||||
1263 | for (BasicBlock &BB : F) { | |||
1264 | for (Instruction &I : BB) { | |||
1265 | if (auto SVI = dyn_cast<ShuffleVectorInst>(&I)) { | |||
1266 | ||||
1267 | Candidates.emplace_back(SVI->getType()); | |||
1268 | ||||
1269 | if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) { | |||
1270 | Candidates.pop_back(); | |||
1271 | continue; | |||
1272 | } | |||
1273 | ||||
1274 | if (!Candidates.back().isInterleaved(Factor, DL)) { | |||
1275 | Candidates.pop_back(); | |||
1276 | } | |||
1277 | } | |||
1278 | } | |||
1279 | } | |||
1280 | ||||
1281 | std::list<VectorInfo> InterleavedLoad; | |||
1282 | while (findPattern(Candidates, InterleavedLoad, Factor, DL)) { | |||
1283 | if (combine(InterleavedLoad, ORE)) { | |||
1284 | changed = true; | |||
1285 | } else { | |||
1286 | // Remove the first element of the Interleaved Load but put the others | |||
1287 | // back on the list and continue searching | |||
1288 | Candidates.splice(Candidates.begin(), InterleavedLoad, | |||
1289 | std::next(InterleavedLoad.begin()), | |||
1290 | InterleavedLoad.end()); | |||
1291 | } | |||
1292 | InterleavedLoad.clear(); | |||
1293 | } | |||
1294 | } | |||
1295 | ||||
1296 | return changed; | |||
1297 | } | |||
1298 | ||||
1299 | namespace { | |||
1300 | /// This pass combines interleaved loads into a pattern detectable by | |||
1301 | /// InterleavedAccessPass. | |||
1302 | struct InterleavedLoadCombine : public FunctionPass { | |||
1303 | static char ID; | |||
1304 | ||||
1305 | InterleavedLoadCombine() : FunctionPass(ID) { | |||
1306 | initializeInterleavedLoadCombinePass(*PassRegistry::getPassRegistry()); | |||
1307 | } | |||
1308 | ||||
1309 | StringRef getPassName() const override { | |||
1310 | return "Interleaved Load Combine Pass"; | |||
1311 | } | |||
1312 | ||||
1313 | bool runOnFunction(Function &F) override { | |||
1314 | if (DisableInterleavedLoadCombine) | |||
1315 | return false; | |||
1316 | ||||
1317 | auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); | |||
1318 | if (!TPC) | |||
1319 | return false; | |||
1320 | ||||
1321 | LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("interleaved-load-combine")) { dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n" ; } } while (false) | |||
1322 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("interleaved-load-combine")) { dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n" ; } } while (false); | |||
1323 | ||||
1324 | return InterleavedLoadCombineImpl( | |||
1325 | F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(), | |||
1326 | getAnalysis<MemorySSAWrapperPass>().getMSSA(), | |||
1327 | TPC->getTM<TargetMachine>()) | |||
1328 | .run(); | |||
1329 | } | |||
1330 | ||||
1331 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
1332 | AU.addRequired<MemorySSAWrapperPass>(); | |||
1333 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
1334 | FunctionPass::getAnalysisUsage(AU); | |||
1335 | } | |||
1336 | ||||
1337 | private: | |||
1338 | }; | |||
1339 | } // anonymous namespace | |||
1340 | ||||
1341 | char InterleavedLoadCombine::ID = 0; | |||
1342 | ||||
1343 | INITIALIZE_PASS_BEGIN(static void *initializeInterleavedLoadCombinePassOnce(PassRegistry &Registry) { | |||
1344 | InterleavedLoadCombine, DEBUG_TYPE,static void *initializeInterleavedLoadCombinePassOnce(PassRegistry &Registry) { | |||
1345 | "Combine interleaved loads into wide loads and shufflevector instructions",static void *initializeInterleavedLoadCombinePassOnce(PassRegistry &Registry) { | |||
1346 | false, false)static void *initializeInterleavedLoadCombinePassOnce(PassRegistry &Registry) { | |||
1347 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
1348 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry); | |||
1349 | INITIALIZE_PASS_END(PassInfo *PI = new PassInfo( "Combine interleaved loads into wide loads and shufflevector instructions" , "interleaved-load-combine", &InterleavedLoadCombine::ID , PassInfo::NormalCtor_t(callDefaultCtor<InterleavedLoadCombine >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeInterleavedLoadCombinePassFlag ; void llvm::initializeInterleavedLoadCombinePass(PassRegistry &Registry) { llvm::call_once(InitializeInterleavedLoadCombinePassFlag , initializeInterleavedLoadCombinePassOnce, std::ref(Registry )); } | |||
1350 | InterleavedLoadCombine, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Combine interleaved loads into wide loads and shufflevector instructions" , "interleaved-load-combine", &InterleavedLoadCombine::ID , PassInfo::NormalCtor_t(callDefaultCtor<InterleavedLoadCombine >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeInterleavedLoadCombinePassFlag ; void llvm::initializeInterleavedLoadCombinePass(PassRegistry &Registry) { llvm::call_once(InitializeInterleavedLoadCombinePassFlag , initializeInterleavedLoadCombinePassOnce, std::ref(Registry )); } | |||
1351 | "Combine interleaved loads into wide loads and shufflevector instructions",PassInfo *PI = new PassInfo( "Combine interleaved loads into wide loads and shufflevector instructions" , "interleaved-load-combine", &InterleavedLoadCombine::ID , PassInfo::NormalCtor_t(callDefaultCtor<InterleavedLoadCombine >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeInterleavedLoadCombinePassFlag ; void llvm::initializeInterleavedLoadCombinePass(PassRegistry &Registry) { llvm::call_once(InitializeInterleavedLoadCombinePassFlag , initializeInterleavedLoadCombinePassOnce, std::ref(Registry )); } | |||
1352 | false, false)PassInfo *PI = new PassInfo( "Combine interleaved loads into wide loads and shufflevector instructions" , "interleaved-load-combine", &InterleavedLoadCombine::ID , PassInfo::NormalCtor_t(callDefaultCtor<InterleavedLoadCombine >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeInterleavedLoadCombinePassFlag ; void llvm::initializeInterleavedLoadCombinePass(PassRegistry &Registry) { llvm::call_once(InitializeInterleavedLoadCombinePassFlag , initializeInterleavedLoadCombinePassOnce, std::ref(Registry )); } | |||
1353 | ||||
1354 | FunctionPass * | |||
1355 | llvm::createInterleavedLoadCombinePass() { | |||
1356 | auto P = new InterleavedLoadCombine(); | |||
1357 | return P; | |||
1358 | } |
1 | //===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(), |
10 | // and dyn_cast_or_null<X>() templates. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_SUPPORT_CASTING_H |
15 | #define LLVM_SUPPORT_CASTING_H |
16 | |
17 | #include "llvm/Support/Compiler.h" |
18 | #include "llvm/Support/type_traits.h" |
19 | #include <cassert> |
20 | #include <memory> |
21 | #include <type_traits> |
22 | |
23 | namespace llvm { |
24 | |
25 | //===----------------------------------------------------------------------===// |
26 | // isa<x> Support Templates |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | // Define a template that can be specialized by smart pointers to reflect the |
30 | // fact that they are automatically dereferenced, and are not involved with the |
31 | // template selection process... the default implementation is a noop. |
32 | // |
33 | template<typename From> struct simplify_type { |
34 | using SimpleType = From; // The real type this represents... |
35 | |
36 | // An accessor to get the real value... |
37 | static SimpleType &getSimplifiedValue(From &Val) { return Val; } |
38 | }; |
39 | |
40 | template<typename From> struct simplify_type<const From> { |
41 | using NonConstSimpleType = typename simplify_type<From>::SimpleType; |
42 | using SimpleType = |
43 | typename add_const_past_pointer<NonConstSimpleType>::type; |
44 | using RetType = |
45 | typename add_lvalue_reference_if_not_pointer<SimpleType>::type; |
46 | |
47 | static RetType getSimplifiedValue(const From& Val) { |
48 | return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val)); |
49 | } |
50 | }; |
51 | |
52 | // The core of the implementation of isa<X> is here; To and From should be |
53 | // the names of classes. This template can be specialized to customize the |
54 | // implementation of isa<> without rewriting it from scratch. |
55 | template <typename To, typename From, typename Enabler = void> |
56 | struct isa_impl { |
57 | static inline bool doit(const From &Val) { |
58 | return To::classof(&Val); |
59 | } |
60 | }; |
61 | |
62 | /// Always allow upcasts, and perform no dynamic check for them. |
63 | template <typename To, typename From> |
64 | struct isa_impl< |
65 | To, From, typename std::enable_if<std::is_base_of<To, From>::value>::type> { |
66 | static inline bool doit(const From &) { return true; } |
67 | }; |
68 | |
69 | template <typename To, typename From> struct isa_impl_cl { |
70 | static inline bool doit(const From &Val) { |
71 | return isa_impl<To, From>::doit(Val); |
72 | } |
73 | }; |
74 | |
75 | template <typename To, typename From> struct isa_impl_cl<To, const From> { |
76 | static inline bool doit(const From &Val) { |
77 | return isa_impl<To, From>::doit(Val); |
78 | } |
79 | }; |
80 | |
81 | template <typename To, typename From> |
82 | struct isa_impl_cl<To, const std::unique_ptr<From>> { |
83 | static inline bool doit(const std::unique_ptr<From> &Val) { |
84 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 84, __PRETTY_FUNCTION__)); |
85 | return isa_impl_cl<To, From>::doit(*Val); |
86 | } |
87 | }; |
88 | |
89 | template <typename To, typename From> struct isa_impl_cl<To, From*> { |
90 | static inline bool doit(const From *Val) { |
91 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 91, __PRETTY_FUNCTION__)); |
92 | return isa_impl<To, From>::doit(*Val); |
93 | } |
94 | }; |
95 | |
96 | template <typename To, typename From> struct isa_impl_cl<To, From*const> { |
97 | static inline bool doit(const From *Val) { |
98 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 98, __PRETTY_FUNCTION__)); |
99 | return isa_impl<To, From>::doit(*Val); |
100 | } |
101 | }; |
102 | |
103 | template <typename To, typename From> struct isa_impl_cl<To, const From*> { |
104 | static inline bool doit(const From *Val) { |
105 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 105, __PRETTY_FUNCTION__)); |
106 | return isa_impl<To, From>::doit(*Val); |
107 | } |
108 | }; |
109 | |
110 | template <typename To, typename From> struct isa_impl_cl<To, const From*const> { |
111 | static inline bool doit(const From *Val) { |
112 | assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast <void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 112, __PRETTY_FUNCTION__)); |
113 | return isa_impl<To, From>::doit(*Val); |
114 | } |
115 | }; |
116 | |
117 | template<typename To, typename From, typename SimpleFrom> |
118 | struct isa_impl_wrap { |
119 | // When From != SimplifiedType, we can simplify the type some more by using |
120 | // the simplify_type template. |
121 | static bool doit(const From &Val) { |
122 | return isa_impl_wrap<To, SimpleFrom, |
123 | typename simplify_type<SimpleFrom>::SimpleType>::doit( |
124 | simplify_type<const From>::getSimplifiedValue(Val)); |
125 | } |
126 | }; |
127 | |
128 | template<typename To, typename FromTy> |
129 | struct isa_impl_wrap<To, FromTy, FromTy> { |
130 | // When From == SimpleType, we are as simple as we are going to get. |
131 | static bool doit(const FromTy &Val) { |
132 | return isa_impl_cl<To,FromTy>::doit(Val); |
133 | } |
134 | }; |
135 | |
136 | // isa<X> - Return true if the parameter to the template is an instance of the |
137 | // template type argument. Used like this: |
138 | // |
139 | // if (isa<Type>(myVal)) { ... } |
140 | // |
141 | template <class X, class Y> LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) { |
142 | return isa_impl_wrap<X, const Y, |
143 | typename simplify_type<const Y>::SimpleType>::doit(Val); |
144 | } |
145 | |
146 | // isa_and_nonnull<X> - Functionally identical to isa, except that a null value |
147 | // is accepted. |
148 | // |
149 | template <class X, class Y> |
150 | LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa_and_nonnull(const Y &Val) { |
151 | if (!Val) |
152 | return false; |
153 | return isa<X>(Val); |
154 | } |
155 | |
156 | //===----------------------------------------------------------------------===// |
157 | // cast<x> Support Templates |
158 | //===----------------------------------------------------------------------===// |
159 | |
160 | template<class To, class From> struct cast_retty; |
161 | |
162 | // Calculate what type the 'cast' function should return, based on a requested |
163 | // type of To and a source type of From. |
164 | template<class To, class From> struct cast_retty_impl { |
165 | using ret_type = To &; // Normal case, return Ty& |
166 | }; |
167 | template<class To, class From> struct cast_retty_impl<To, const From> { |
168 | using ret_type = const To &; // Normal case, return Ty& |
169 | }; |
170 | |
171 | template<class To, class From> struct cast_retty_impl<To, From*> { |
172 | using ret_type = To *; // Pointer arg case, return Ty* |
173 | }; |
174 | |
175 | template<class To, class From> struct cast_retty_impl<To, const From*> { |
176 | using ret_type = const To *; // Constant pointer arg case, return const Ty* |
177 | }; |
178 | |
179 | template<class To, class From> struct cast_retty_impl<To, const From*const> { |
180 | using ret_type = const To *; // Constant pointer arg case, return const Ty* |
181 | }; |
182 | |
183 | template <class To, class From> |
184 | struct cast_retty_impl<To, std::unique_ptr<From>> { |
185 | private: |
186 | using PointerType = typename cast_retty_impl<To, From *>::ret_type; |
187 | using ResultType = typename std::remove_pointer<PointerType>::type; |
188 | |
189 | public: |
190 | using ret_type = std::unique_ptr<ResultType>; |
191 | }; |
192 | |
193 | template<class To, class From, class SimpleFrom> |
194 | struct cast_retty_wrap { |
195 | // When the simplified type and the from type are not the same, use the type |
196 | // simplifier to reduce the type, then reuse cast_retty_impl to get the |
197 | // resultant type. |
198 | using ret_type = typename cast_retty<To, SimpleFrom>::ret_type; |
199 | }; |
200 | |
201 | template<class To, class FromTy> |
202 | struct cast_retty_wrap<To, FromTy, FromTy> { |
203 | // When the simplified type is equal to the from type, use it directly. |
204 | using ret_type = typename cast_retty_impl<To,FromTy>::ret_type; |
205 | }; |
206 | |
207 | template<class To, class From> |
208 | struct cast_retty { |
209 | using ret_type = typename cast_retty_wrap< |
210 | To, From, typename simplify_type<From>::SimpleType>::ret_type; |
211 | }; |
212 | |
213 | // Ensure the non-simple values are converted using the simplify_type template |
214 | // that may be specialized by smart pointers... |
215 | // |
216 | template<class To, class From, class SimpleFrom> struct cast_convert_val { |
217 | // This is not a simple type, use the template to simplify it... |
218 | static typename cast_retty<To, From>::ret_type doit(From &Val) { |
219 | return cast_convert_val<To, SimpleFrom, |
220 | typename simplify_type<SimpleFrom>::SimpleType>::doit( |
221 | simplify_type<From>::getSimplifiedValue(Val)); |
222 | } |
223 | }; |
224 | |
225 | template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> { |
226 | // This _is_ a simple type, just cast it. |
227 | static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) { |
228 | typename cast_retty<To, FromTy>::ret_type Res2 |
229 | = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val); |
230 | return Res2; |
231 | } |
232 | }; |
233 | |
234 | template <class X> struct is_simple_type { |
235 | static const bool value = |
236 | std::is_same<X, typename simplify_type<X>::SimpleType>::value; |
237 | }; |
238 | |
239 | // cast<X> - Return the argument parameter cast to the specified type. This |
240 | // casting operator asserts that the type is correct, so it does not return null |
241 | // on failure. It does not allow a null argument (use cast_or_null for that). |
242 | // It is typically used like this: |
243 | // |
244 | // cast<Instruction>(myVal)->getParent() |
245 | // |
246 | template <class X, class Y> |
247 | inline typename std::enable_if<!is_simple_type<Y>::value, |
248 | typename cast_retty<X, const Y>::ret_type>::type |
249 | cast(const Y &Val) { |
250 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 250, __PRETTY_FUNCTION__)); |
251 | return cast_convert_val< |
252 | X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val); |
253 | } |
254 | |
255 | template <class X, class Y> |
256 | inline typename cast_retty<X, Y>::ret_type cast(Y &Val) { |
257 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 257, __PRETTY_FUNCTION__)); |
258 | return cast_convert_val<X, Y, |
259 | typename simplify_type<Y>::SimpleType>::doit(Val); |
260 | } |
261 | |
262 | template <class X, class Y> |
263 | inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) { |
264 | assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 264, __PRETTY_FUNCTION__)); |
265 | return cast_convert_val<X, Y*, |
266 | typename simplify_type<Y*>::SimpleType>::doit(Val); |
267 | } |
268 | |
269 | template <class X, class Y> |
270 | inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type |
271 | cast(std::unique_ptr<Y> &&Val) { |
272 | assert(isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val.get()) && \"cast<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 272, __PRETTY_FUNCTION__)); |
273 | using ret_type = typename cast_retty<X, std::unique_ptr<Y>>::ret_type; |
274 | return ret_type( |
275 | cast_convert_val<X, Y *, typename simplify_type<Y *>::SimpleType>::doit( |
276 | Val.release())); |
277 | } |
278 | |
279 | // cast_or_null<X> - Functionally identical to cast, except that a null value is |
280 | // accepted. |
281 | // |
282 | template <class X, class Y> |
283 | LLVM_NODISCARD[[clang::warn_unused_result]] inline |
284 | typename std::enable_if<!is_simple_type<Y>::value, |
285 | typename cast_retty<X, const Y>::ret_type>::type |
286 | cast_or_null(const Y &Val) { |
287 | if (!Val) |
288 | return nullptr; |
289 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 289, __PRETTY_FUNCTION__)); |
290 | return cast<X>(Val); |
291 | } |
292 | |
293 | template <class X, class Y> |
294 | LLVM_NODISCARD[[clang::warn_unused_result]] inline |
295 | typename std::enable_if<!is_simple_type<Y>::value, |
296 | typename cast_retty<X, Y>::ret_type>::type |
297 | cast_or_null(Y &Val) { |
298 | if (!Val) |
299 | return nullptr; |
300 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 300, __PRETTY_FUNCTION__)); |
301 | return cast<X>(Val); |
302 | } |
303 | |
304 | template <class X, class Y> |
305 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type |
306 | cast_or_null(Y *Val) { |
307 | if (!Val) return nullptr; |
308 | assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!" ) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\"" , "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h" , 308, __PRETTY_FUNCTION__)); |
309 | return cast<X>(Val); |
310 | } |
311 | |
312 | template <class X, class Y> |
313 | inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type |
314 | cast_or_null(std::unique_ptr<Y> &&Val) { |
315 | if (!Val) |
316 | return nullptr; |
317 | return cast<X>(std::move(Val)); |
318 | } |
319 | |
320 | // dyn_cast<X> - Return the argument parameter cast to the specified type. This |
321 | // casting operator returns null if the argument is of the wrong type, so it can |
322 | // be used to test for a type as well as cast if successful. This should be |
323 | // used in the context of an if statement like this: |
324 | // |
325 | // if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... } |
326 | // |
327 | |
328 | template <class X, class Y> |
329 | LLVM_NODISCARD[[clang::warn_unused_result]] inline |
330 | typename std::enable_if<!is_simple_type<Y>::value, |
331 | typename cast_retty<X, const Y>::ret_type>::type |
332 | dyn_cast(const Y &Val) { |
333 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
334 | } |
335 | |
336 | template <class X, class Y> |
337 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) { |
338 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
339 | } |
340 | |
341 | template <class X, class Y> |
342 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) { |
343 | return isa<X>(Val) ? cast<X>(Val) : nullptr; |
344 | } |
345 | |
346 | // dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null |
347 | // value is accepted. |
348 | // |
349 | template <class X, class Y> |
350 | LLVM_NODISCARD[[clang::warn_unused_result]] inline |
351 | typename std::enable_if<!is_simple_type<Y>::value, |
352 | typename cast_retty<X, const Y>::ret_type>::type |
353 | dyn_cast_or_null(const Y &Val) { |
354 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
355 | } |
356 | |
357 | template <class X, class Y> |
358 | LLVM_NODISCARD[[clang::warn_unused_result]] inline |
359 | typename std::enable_if<!is_simple_type<Y>::value, |
360 | typename cast_retty<X, Y>::ret_type>::type |
361 | dyn_cast_or_null(Y &Val) { |
362 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
363 | } |
364 | |
365 | template <class X, class Y> |
366 | LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type |
367 | dyn_cast_or_null(Y *Val) { |
368 | return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr; |
369 | } |
370 | |
371 | // unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>, |
372 | // taking ownership of the input pointer iff isa<X>(Val) is true. If the |
373 | // cast is successful, From refers to nullptr on exit and the casted value |
374 | // is returned. If the cast is unsuccessful, the function returns nullptr |
375 | // and From is unchanged. |
376 | template <class X, class Y> |
377 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &Val) |
378 | -> decltype(cast<X>(Val)) { |
379 | if (!isa<X>(Val)) |
380 | return nullptr; |
381 | return cast<X>(std::move(Val)); |
382 | } |
383 | |
384 | template <class X, class Y> |
385 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &&Val) |
386 | -> decltype(cast<X>(Val)) { |
387 | return unique_dyn_cast<X, Y>(Val); |
388 | } |
389 | |
390 | // dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast, except that |
391 | // a null value is accepted. |
392 | template <class X, class Y> |
393 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &Val) |
394 | -> decltype(cast<X>(Val)) { |
395 | if (!Val) |
396 | return nullptr; |
397 | return unique_dyn_cast<X, Y>(Val); |
398 | } |
399 | |
400 | template <class X, class Y> |
401 | LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &&Val) |
402 | -> decltype(cast<X>(Val)) { |
403 | return unique_dyn_cast_or_null<X, Y>(Val); |
404 | } |
405 | |
406 | } // end namespace llvm |
407 | |
408 | #endif // LLVM_SUPPORT_CASTING_H |