LLVM 17.0.0git
Analysis.cpp
Go to the documentation of this file.
1//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines several CodeGen-specific LLVM IR analysis utilities.
10//
11//===----------------------------------------------------------------------===//
12
19#include "llvm/IR/DataLayout.h"
21#include "llvm/IR/Function.h"
24#include "llvm/IR/Module.h"
27
28using namespace llvm;
29
30/// Compute the linearized index of a member in a nested aggregate/struct/array
31/// by recursing and accumulating CurIndex as long as there are indices in the
32/// index list.
34 const unsigned *Indices,
35 const unsigned *IndicesEnd,
36 unsigned CurIndex) {
37 // Base case: We're done.
38 if (Indices && Indices == IndicesEnd)
39 return CurIndex;
40
41 // Given a struct type, recursively traverse the elements.
42 if (StructType *STy = dyn_cast<StructType>(Ty)) {
43 for (auto I : llvm::enumerate(STy->elements())) {
44 Type *ET = I.value();
45 if (Indices && *Indices == I.index())
46 return ComputeLinearIndex(ET, Indices + 1, IndicesEnd, CurIndex);
47 CurIndex = ComputeLinearIndex(ET, nullptr, nullptr, CurIndex);
48 }
49 assert(!Indices && "Unexpected out of bound");
50 return CurIndex;
51 }
52 // Given an array type, recursively traverse the elements.
53 else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
54 Type *EltTy = ATy->getElementType();
55 unsigned NumElts = ATy->getNumElements();
56 // Compute the Linear offset when jumping one element of the array
57 unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0);
58 if (Indices) {
59 assert(*Indices < NumElts && "Unexpected out of bound");
60 // If the indice is inside the array, compute the index to the requested
61 // elt and recurse inside the element with the end of the indices list
62 CurIndex += EltLinearOffset* *Indices;
63 return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
64 }
65 CurIndex += EltLinearOffset*NumElts;
66 return CurIndex;
67 }
68 // We haven't found the type we're looking for, so keep searching.
69 return CurIndex + 1;
70}
71
72/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
73/// EVTs that represent all the individual underlying
74/// non-aggregate types that comprise it.
75///
76/// If Offsets is non-null, it points to a vector to be filled in
77/// with the in-memory offsets of each of the individual values.
78///
80 Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
83 uint64_t StartingOffset) {
84 // Given a struct type, recursively traverse the elements.
85 if (StructType *STy = dyn_cast<StructType>(Ty)) {
86 // If the Offsets aren't needed, don't query the struct layout. This allows
87 // us to support structs with scalable vectors for operations that don't
88 // need offsets.
89 const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
90 for (StructType::element_iterator EB = STy->element_begin(),
91 EI = EB,
92 EE = STy->element_end();
93 EI != EE; ++EI) {
94 // Don't compute the element offset if we didn't get a StructLayout above.
95 uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0;
96 ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
97 StartingOffset + EltOffset);
98 }
99 return;
100 }
101 // Given an array type, recursively traverse the elements.
102 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
103 Type *EltTy = ATy->getElementType();
104 uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
105 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
106 ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
107 StartingOffset + i * EltSize);
108 return;
109 }
110 // Interpret void as zero return values.
111 if (Ty->isVoidTy())
112 return;
113 // Base case: we can get an EVT for this LLVM IR type.
114 ValueVTs.push_back(TLI.getValueType(DL, Ty));
115 if (MemVTs)
116 MemVTs->push_back(TLI.getMemValueType(DL, Ty));
117 if (Offsets)
118 Offsets->push_back(StartingOffset);
119}
120
122 Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
124 uint64_t StartingOffset) {
125 return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets,
126 StartingOffset);
127}
128
130 SmallVectorImpl<LLT> &ValueTys,
132 uint64_t StartingOffset) {
133 // Given a struct type, recursively traverse the elements.
134 if (StructType *STy = dyn_cast<StructType>(&Ty)) {
135 // If the Offsets aren't needed, don't query the struct layout. This allows
136 // us to support structs with scalable vectors for operations that don't
137 // need offsets.
138 const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
139 for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) {
140 uint64_t EltOffset = SL ? SL->getElementOffset(I) : 0;
141 computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
142 StartingOffset + EltOffset);
143 }
144 return;
145 }
146 // Given an array type, recursively traverse the elements.
147 if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
148 Type *EltTy = ATy->getElementType();
149 uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
150 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
151 computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
152 StartingOffset + i * EltSize);
153 return;
154 }
155 // Interpret void as zero return values.
156 if (Ty.isVoidTy())
157 return;
158 // Base case: we can get an LLT for this LLVM IR type.
159 ValueTys.push_back(getLLTForType(Ty, DL));
160 if (Offsets != nullptr)
161 Offsets->push_back(StartingOffset * 8);
162}
163
164/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
166 V = V->stripPointerCasts();
167 GlobalValue *GV = dyn_cast<GlobalValue>(V);
168 GlobalVariable *Var = dyn_cast<GlobalVariable>(V);
169
170 if (Var && Var->getName() == "llvm.eh.catch.all.value") {
171 assert(Var->hasInitializer() &&
172 "The EH catch-all value must have an initializer");
173 Value *Init = Var->getInitializer();
174 GV = dyn_cast<GlobalValue>(Init);
175 if (!GV) V = cast<ConstantPointerNull>(Init);
176 }
177
178 assert((GV || isa<ConstantPointerNull>(V)) &&
179 "TypeInfo must be a global variable or NULL");
180 return GV;
181}
182
183/// getFCmpCondCode - Return the ISD condition code corresponding to
184/// the given LLVM IR floating-point condition code. This includes
185/// consideration of global floating-point math flags.
186///
188 switch (Pred) {
189 case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
190 case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
191 case FCmpInst::FCMP_OGT: return ISD::SETOGT;
192 case FCmpInst::FCMP_OGE: return ISD::SETOGE;
193 case FCmpInst::FCMP_OLT: return ISD::SETOLT;
194 case FCmpInst::FCMP_OLE: return ISD::SETOLE;
195 case FCmpInst::FCMP_ONE: return ISD::SETONE;
196 case FCmpInst::FCMP_ORD: return ISD::SETO;
197 case FCmpInst::FCMP_UNO: return ISD::SETUO;
198 case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
199 case FCmpInst::FCMP_UGT: return ISD::SETUGT;
200 case FCmpInst::FCMP_UGE: return ISD::SETUGE;
201 case FCmpInst::FCMP_ULT: return ISD::SETULT;
202 case FCmpInst::FCMP_ULE: return ISD::SETULE;
203 case FCmpInst::FCMP_UNE: return ISD::SETUNE;
204 case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
205 default: llvm_unreachable("Invalid FCmp predicate opcode!");
206 }
207}
208
210 switch (CC) {
211 case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
212 case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
213 case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
214 case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
215 case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
216 case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
217 default: return CC;
218 }
219}
220
222 switch (Pred) {
223 case ICmpInst::ICMP_EQ: return ISD::SETEQ;
224 case ICmpInst::ICMP_NE: return ISD::SETNE;
225 case ICmpInst::ICMP_SLE: return ISD::SETLE;
226 case ICmpInst::ICMP_ULE: return ISD::SETULE;
227 case ICmpInst::ICMP_SGE: return ISD::SETGE;
228 case ICmpInst::ICMP_UGE: return ISD::SETUGE;
229 case ICmpInst::ICMP_SLT: return ISD::SETLT;
230 case ICmpInst::ICMP_ULT: return ISD::SETULT;
231 case ICmpInst::ICMP_SGT: return ISD::SETGT;
232 case ICmpInst::ICMP_UGT: return ISD::SETUGT;
233 default:
234 llvm_unreachable("Invalid ICmp predicate opcode!");
235 }
236}
237
239 switch (Pred) {
240 case ISD::SETEQ:
241 return ICmpInst::ICMP_EQ;
242 case ISD::SETNE:
243 return ICmpInst::ICMP_NE;
244 case ISD::SETLE:
245 return ICmpInst::ICMP_SLE;
246 case ISD::SETULE:
247 return ICmpInst::ICMP_ULE;
248 case ISD::SETGE:
249 return ICmpInst::ICMP_SGE;
250 case ISD::SETUGE:
251 return ICmpInst::ICMP_UGE;
252 case ISD::SETLT:
253 return ICmpInst::ICMP_SLT;
254 case ISD::SETULT:
255 return ICmpInst::ICMP_ULT;
256 case ISD::SETGT:
257 return ICmpInst::ICMP_SGT;
258 case ISD::SETUGT:
259 return ICmpInst::ICMP_UGT;
260 default:
261 llvm_unreachable("Invalid ISD integer condition code!");
262 }
263}
264
265static bool isNoopBitcast(Type *T1, Type *T2,
266 const TargetLoweringBase& TLI) {
267 return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
268 (isa<VectorType>(T1) && isa<VectorType>(T2) &&
270}
271
272/// Look through operations that will be free to find the earliest source of
273/// this value.
274///
275/// @param ValLoc If V has aggregate type, we will be interested in a particular
276/// scalar component. This records its address; the reverse of this list gives a
277/// sequence of indices appropriate for an extractvalue to locate the important
278/// value. This value is updated during the function and on exit will indicate
279/// similar information for the Value returned.
280///
281/// @param DataBits If this function looks through truncate instructions, this
282/// will record the smallest size attained.
283static const Value *getNoopInput(const Value *V,
285 unsigned &DataBits,
286 const TargetLoweringBase &TLI,
287 const DataLayout &DL) {
288 while (true) {
289 // Try to look through V1; if V1 is not an instruction, it can't be looked
290 // through.
291 const Instruction *I = dyn_cast<Instruction>(V);
292 if (!I || I->getNumOperands() == 0) return V;
293 const Value *NoopInput = nullptr;
294
295 Value *Op = I->getOperand(0);
296 if (isa<BitCastInst>(I)) {
297 // Look through truly no-op bitcasts.
298 if (isNoopBitcast(Op->getType(), I->getType(), TLI))
299 NoopInput = Op;
300 } else if (isa<GetElementPtrInst>(I)) {
301 // Look through getelementptr
302 if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
303 NoopInput = Op;
304 } else if (isa<IntToPtrInst>(I)) {
305 // Look through inttoptr.
306 // Make sure this isn't a truncating or extending cast. We could
307 // support this eventually, but don't bother for now.
308 if (!isa<VectorType>(I->getType()) &&
309 DL.getPointerSizeInBits() ==
310 cast<IntegerType>(Op->getType())->getBitWidth())
311 NoopInput = Op;
312 } else if (isa<PtrToIntInst>(I)) {
313 // Look through ptrtoint.
314 // Make sure this isn't a truncating or extending cast. We could
315 // support this eventually, but don't bother for now.
316 if (!isa<VectorType>(I->getType()) &&
317 DL.getPointerSizeInBits() ==
318 cast<IntegerType>(I->getType())->getBitWidth())
319 NoopInput = Op;
320 } else if (isa<TruncInst>(I) &&
321 TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
322 DataBits =
323 std::min((uint64_t)DataBits,
324 I->getType()->getPrimitiveSizeInBits().getFixedValue());
325 NoopInput = Op;
326 } else if (auto *CB = dyn_cast<CallBase>(I)) {
327 const Value *ReturnedOp = CB->getReturnedArgOperand();
328 if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
329 NoopInput = ReturnedOp;
330 } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
331 // Value may come from either the aggregate or the scalar
332 ArrayRef<unsigned> InsertLoc = IVI->getIndices();
333 if (ValLoc.size() >= InsertLoc.size() &&
334 std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())) {
335 // The type being inserted is a nested sub-type of the aggregate; we
336 // have to remove those initial indices to get the location we're
337 // interested in for the operand.
338 ValLoc.resize(ValLoc.size() - InsertLoc.size());
339 NoopInput = IVI->getInsertedValueOperand();
340 } else {
341 // The struct we're inserting into has the value we're interested in, no
342 // change of address.
343 NoopInput = Op;
344 }
345 } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
346 // The part we're interested in will inevitably be some sub-section of the
347 // previous aggregate. Combine the two paths to obtain the true address of
348 // our element.
349 ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
350 ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
351 NoopInput = Op;
352 }
353 // Terminate if we couldn't find anything to look through.
354 if (!NoopInput)
355 return V;
356
357 V = NoopInput;
358 }
359}
360
361/// Return true if this scalar return value only has bits discarded on its path
362/// from the "tail call" to the "ret". This includes the obvious noop
363/// instructions handled by getNoopInput above as well as free truncations (or
364/// extensions prior to the call).
365static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
366 SmallVectorImpl<unsigned> &RetIndices,
367 SmallVectorImpl<unsigned> &CallIndices,
368 bool AllowDifferingSizes,
369 const TargetLoweringBase &TLI,
370 const DataLayout &DL) {
371
372 // Trace the sub-value needed by the return value as far back up the graph as
373 // possible, in the hope that it will intersect with the value produced by the
374 // call. In the simple case with no "returned" attribute, the hope is actually
375 // that we end up back at the tail call instruction itself.
376 unsigned BitsRequired = UINT_MAX;
377 RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL);
378
379 // If this slot in the value returned is undef, it doesn't matter what the
380 // call puts there, it'll be fine.
381 if (isa<UndefValue>(RetVal))
382 return true;
383
384 // Now do a similar search up through the graph to find where the value
385 // actually returned by the "tail call" comes from. In the simple case without
386 // a "returned" attribute, the search will be blocked immediately and the loop
387 // a Noop.
388 unsigned BitsProvided = UINT_MAX;
389 CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL);
390
391 // There's no hope if we can't actually trace them to (the same part of!) the
392 // same value.
393 if (CallVal != RetVal || CallIndices != RetIndices)
394 return false;
395
396 // However, intervening truncates may have made the call non-tail. Make sure
397 // all the bits that are needed by the "ret" have been provided by the "tail
398 // call". FIXME: with sufficiently cunning bit-tracking, we could look through
399 // extensions too.
400 if (BitsProvided < BitsRequired ||
401 (!AllowDifferingSizes && BitsProvided != BitsRequired))
402 return false;
403
404 return true;
405}
406
407/// For an aggregate type, determine whether a given index is within bounds or
408/// not.
409static bool indexReallyValid(Type *T, unsigned Idx) {
410 if (ArrayType *AT = dyn_cast<ArrayType>(T))
411 return Idx < AT->getNumElements();
412
413 return Idx < cast<StructType>(T)->getNumElements();
414}
415
416/// Move the given iterators to the next leaf type in depth first traversal.
417///
418/// Performs a depth-first traversal of the type as specified by its arguments,
419/// stopping at the next leaf node (which may be a legitimate scalar type or an
420/// empty struct or array).
421///
422/// @param SubTypes List of the partial components making up the type from
423/// outermost to innermost non-empty aggregate. The element currently
424/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1).
425///
426/// @param Path Set of extractvalue indices leading from the outermost type
427/// (SubTypes[0]) to the leaf node currently represented.
428///
429/// @returns true if a new type was found, false otherwise. Calling this
430/// function again on a finished iterator will repeatedly return
431/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
432/// aggregate or a non-aggregate
435 // First march back up the tree until we can successfully increment one of the
436 // coordinates in Path.
437 while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) {
438 Path.pop_back();
439 SubTypes.pop_back();
440 }
441
442 // If we reached the top, then the iterator is done.
443 if (Path.empty())
444 return false;
445
446 // We know there's *some* valid leaf now, so march back down the tree picking
447 // out the left-most element at each node.
448 ++Path.back();
449 Type *DeeperType =
450 ExtractValueInst::getIndexedType(SubTypes.back(), Path.back());
451 while (DeeperType->isAggregateType()) {
452 if (!indexReallyValid(DeeperType, 0))
453 return true;
454
455 SubTypes.push_back(DeeperType);
456 Path.push_back(0);
457
458 DeeperType = ExtractValueInst::getIndexedType(DeeperType, 0);
459 }
460
461 return true;
462}
463
464/// Find the first non-empty, scalar-like type in Next and setup the iterator
465/// components.
466///
467/// Assuming Next is an aggregate of some kind, this function will traverse the
468/// tree from left to right (i.e. depth-first) looking for the first
469/// non-aggregate type which will play a role in function return.
470///
471/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup
472/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first
473/// i32 in that type.
474static bool firstRealType(Type *Next, SmallVectorImpl<Type *> &SubTypes,
476 // First initialise the iterator components to the first "leaf" node
477 // (i.e. node with no valid sub-type at any index, so {} does count as a leaf
478 // despite nominally being an aggregate).
479 while (Type *FirstInner = ExtractValueInst::getIndexedType(Next, 0)) {
480 SubTypes.push_back(Next);
481 Path.push_back(0);
482 Next = FirstInner;
483 }
484
485 // If there's no Path now, Next was originally scalar already (or empty
486 // leaf). We're done.
487 if (Path.empty())
488 return true;
489
490 // Otherwise, use normal iteration to keep looking through the tree until we
491 // find a non-aggregate type.
492 while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back())
493 ->isAggregateType()) {
494 if (!advanceToNextLeafType(SubTypes, Path))
495 return false;
496 }
497
498 return true;
499}
500
501/// Set the iterator data-structures to the next non-empty, non-aggregate
502/// subtype.
505 do {
506 if (!advanceToNextLeafType(SubTypes, Path))
507 return false;
508
509 assert(!Path.empty() && "found a leaf but didn't set the path?");
510 } while (ExtractValueInst::getIndexedType(SubTypes.back(), Path.back())
511 ->isAggregateType());
512
513 return true;
514}
515
516
517/// Test if the given instruction is in a position to be optimized
518/// with a tail-call. This roughly means that it's in a block with
519/// a return and there's nothing that needs to be scheduled
520/// between it and the return.
521///
522/// This function only tests target-independent requirements.
524 const BasicBlock *ExitBB = Call.getParent();
525 const Instruction *Term = ExitBB->getTerminator();
526 const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
527
528 // The block must end in a return statement or unreachable.
529 //
530 // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
531 // an unreachable, for now. The way tailcall optimization is currently
532 // implemented means it will add an epilogue followed by a jump. That is
533 // not profitable. Also, if the callee is a special function (e.g.
534 // longjmp on x86), it can end up causing miscompilation that has not
535 // been fully understood.
536 if (!Ret && ((!TM.Options.GuaranteedTailCallOpt &&
537 Call.getCallingConv() != CallingConv::Tail &&
538 Call.getCallingConv() != CallingConv::SwiftTail) ||
539 !isa<UnreachableInst>(Term)))
540 return false;
541
542 // If I will have a chain, make sure no other instruction that will have a
543 // chain interposes between I and the return.
544 // Check for all calls including speculatable functions.
545 for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
546 if (&*BBI == &Call)
547 break;
548 // Debug info intrinsics do not get in the way of tail call optimization.
549 // Pseudo probe intrinsics do not block tail call optimization either.
550 if (BBI->isDebugOrPseudoInst())
551 continue;
552 // A lifetime end, assume or noalias.decl intrinsic should not stop tail
553 // call optimization.
554 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
555 if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
556 II->getIntrinsicID() == Intrinsic::assume ||
557 II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl)
558 continue;
559 if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
561 return false;
562 }
563
564 const Function *F = ExitBB->getParent();
566 F, &Call, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
567}
568
570 const ReturnInst *Ret,
571 const TargetLoweringBase &TLI,
572 bool *AllowDifferingSizes) {
573 // ADS may be null, so don't write to it directly.
574 bool DummyADS;
575 bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
576 ADS = true;
577
578 AttrBuilder CallerAttrs(F->getContext(), F->getAttributes().getRetAttrs());
579 AttrBuilder CalleeAttrs(F->getContext(),
580 cast<CallInst>(I)->getAttributes().getRetAttrs());
581
582 // Following attributes are completely benign as far as calling convention
583 // goes, they shouldn't affect whether the call is a tail call.
584 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
585 Attribute::DereferenceableOrNull, Attribute::NoAlias,
586 Attribute::NonNull, Attribute::NoUndef}) {
587 CallerAttrs.removeAttribute(Attr);
588 CalleeAttrs.removeAttribute(Attr);
589 }
590
591 if (CallerAttrs.contains(Attribute::ZExt)) {
592 if (!CalleeAttrs.contains(Attribute::ZExt))
593 return false;
594
595 ADS = false;
596 CallerAttrs.removeAttribute(Attribute::ZExt);
597 CalleeAttrs.removeAttribute(Attribute::ZExt);
598 } else if (CallerAttrs.contains(Attribute::SExt)) {
599 if (!CalleeAttrs.contains(Attribute::SExt))
600 return false;
601
602 ADS = false;
603 CallerAttrs.removeAttribute(Attribute::SExt);
604 CalleeAttrs.removeAttribute(Attribute::SExt);
605 }
606
607 // Drop sext and zext return attributes if the result is not used.
608 // This enables tail calls for code like:
609 //
610 // define void @caller() {
611 // entry:
612 // %unused_result = tail call zeroext i1 @callee()
613 // br label %retlabel
614 // retlabel:
615 // ret void
616 // }
617 if (I->use_empty()) {
618 CalleeAttrs.removeAttribute(Attribute::SExt);
619 CalleeAttrs.removeAttribute(Attribute::ZExt);
620 }
621
622 // If they're still different, there's some facet we don't understand
623 // (currently only "inreg", but in future who knows). It may be OK but the
624 // only safe option is to reject the tail call.
625 return CallerAttrs == CalleeAttrs;
626}
627
628/// Check whether B is a bitcast of a pointer type to another pointer type,
629/// which is equal to A.
630static bool isPointerBitcastEqualTo(const Value *A, const Value *B) {
631 assert(A && B && "Expected non-null inputs!");
632
633 auto *BitCastIn = dyn_cast<BitCastInst>(B);
634
635 if (!BitCastIn)
636 return false;
637
638 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
639 return false;
640
641 return A == BitCastIn->getOperand(0);
642}
643
645 const Instruction *I,
646 const ReturnInst *Ret,
647 const TargetLoweringBase &TLI) {
648 // If the block ends with a void return or unreachable, it doesn't matter
649 // what the call's return type is.
650 if (!Ret || Ret->getNumOperands() == 0) return true;
651
652 // If the return value is undef, it doesn't matter what the call's
653 // return type is.
654 if (isa<UndefValue>(Ret->getOperand(0))) return true;
655
656 // Make sure the attributes attached to each return are compatible.
657 bool AllowDifferingSizes;
658 if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes))
659 return false;
660
661 const Value *RetVal = Ret->getOperand(0), *CallVal = I;
662 // Intrinsic like llvm.memcpy has no return value, but the expanded
663 // libcall may or may not have return value. On most platforms, it
664 // will be expanded as memcpy in libc, which returns the first
665 // argument. On other platforms like arm-none-eabi, memcpy may be
666 // expanded as library call without return value, like __aeabi_memcpy.
667 const CallInst *Call = cast<CallInst>(I);
668 if (Function *F = Call->getCalledFunction()) {
669 Intrinsic::ID IID = F->getIntrinsicID();
670 if (((IID == Intrinsic::memcpy &&
671 TLI.getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy")) ||
672 (IID == Intrinsic::memmove &&
673 TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
674 (IID == Intrinsic::memset &&
675 TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
676 (RetVal == Call->getArgOperand(0) ||
677 isPointerBitcastEqualTo(RetVal, Call->getArgOperand(0))))
678 return true;
679 }
680
681 SmallVector<unsigned, 4> RetPath, CallPath;
682 SmallVector<Type *, 4> RetSubTypes, CallSubTypes;
683
684 bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
685 bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);
686
687 // Nothing's actually returned, it doesn't matter what the callee put there
688 // it's a valid tail call.
689 if (RetEmpty)
690 return true;
691
692 // Iterate pairwise through each of the value types making up the tail call
693 // and the corresponding return. For each one we want to know whether it's
694 // essentially going directly from the tail call to the ret, via operations
695 // that end up not generating any code.
696 //
697 // We allow a certain amount of covariance here. For example it's permitted
698 // for the tail call to define more bits than the ret actually cares about
699 // (e.g. via a truncate).
700 do {
701 if (CallEmpty) {
702 // We've exhausted the values produced by the tail call instruction, the
703 // rest are essentially undef. The type doesn't really matter, but we need
704 // *something*.
705 Type *SlotType =
706 ExtractValueInst::getIndexedType(RetSubTypes.back(), RetPath.back());
707 CallVal = UndefValue::get(SlotType);
708 }
709
710 // The manipulations performed when we're looking through an insertvalue or
711 // an extractvalue would happen at the front of the RetPath list, so since
712 // we have to copy it anyway it's more efficient to create a reversed copy.
713 SmallVector<unsigned, 4> TmpRetPath(llvm::reverse(RetPath));
714 SmallVector<unsigned, 4> TmpCallPath(llvm::reverse(CallPath));
715
716 // Finally, we can check whether the value produced by the tail call at this
717 // index is compatible with the value we return.
718 if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
719 AllowDifferingSizes, TLI,
720 F->getParent()->getDataLayout()))
721 return false;
722
723 CallEmpty = !nextRealType(CallSubTypes, CallPath);
724 } while(nextRealType(RetSubTypes, RetPath));
725
726 return true;
727}
728
730 DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, int EHScope,
731 const MachineBasicBlock *MBB) {
733 while (!Worklist.empty()) {
734 const MachineBasicBlock *Visiting = Worklist.pop_back_val();
735 // Don't follow blocks which start new scopes.
736 if (Visiting->isEHPad() && Visiting != MBB)
737 continue;
738
739 // Add this MBB to our scope.
740 auto P = EHScopeMembership.insert(std::make_pair(Visiting, EHScope));
741
742 // Don't revisit blocks.
743 if (!P.second) {
744 assert(P.first->second == EHScope && "MBB is part of two scopes!");
745 continue;
746 }
747
748 // Returns are boundaries where scope transfer can occur, don't follow
749 // successors.
750 if (Visiting->isEHScopeReturnBlock())
751 continue;
752
753 append_range(Worklist, Visiting->successors());
754 }
755}
756
760
761 // We don't have anything to do if there aren't any EH pads.
762 if (!MF.hasEHScopes())
763 return EHScopeMembership;
764
765 int EntryBBNumber = MF.front().getNumber();
766 bool IsSEH = isAsynchronousEHPersonality(
768
774 for (const MachineBasicBlock &MBB : MF) {
775 if (MBB.isEHScopeEntry()) {
776 EHScopeBlocks.push_back(&MBB);
777 } else if (IsSEH && MBB.isEHPad()) {
778 SEHCatchPads.push_back(&MBB);
779 } else if (MBB.pred_empty()) {
780 UnreachableBlocks.push_back(&MBB);
781 }
782
784
785 // CatchPads are not scopes for SEH so do not consider CatchRet to
786 // transfer control to another scope.
787 if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode())
788 continue;
789
790 // FIXME: SEH CatchPads are not necessarily in the parent function:
791 // they could be inside a finally block.
792 const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
793 const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
794 CatchRetSuccessors.push_back(
795 {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
796 }
797
798 // We don't have anything to do if there aren't any EH pads.
799 if (EHScopeBlocks.empty())
800 return EHScopeMembership;
801
802 // Identify all the basic blocks reachable from the function entry.
803 collectEHScopeMembers(EHScopeMembership, EntryBBNumber, &MF.front());
804 // All blocks not part of a scope are in the parent function.
805 for (const MachineBasicBlock *MBB : UnreachableBlocks)
806 collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
807 // Next, identify all the blocks inside the scopes.
808 for (const MachineBasicBlock *MBB : EHScopeBlocks)
809 collectEHScopeMembers(EHScopeMembership, MBB->getNumber(), MBB);
810 // SEH CatchPads aren't really scopes, handle them separately.
811 for (const MachineBasicBlock *MBB : SEHCatchPads)
812 collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);
813 // Finally, identify all the targets of a catchret.
814 for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
815 CatchRetSuccessors)
816 collectEHScopeMembers(EHScopeMembership, CatchRetPair.second,
817 CatchRetPair.first);
818 return EHScopeMembership;
819}
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool isNoopBitcast(Type *T1, Type *T2, const TargetLoweringBase &TLI)
Definition: Analysis.cpp:265
static bool firstRealType(Type *Next, SmallVectorImpl< Type * > &SubTypes, SmallVectorImpl< unsigned > &Path)
Find the first non-empty, scalar-like type in Next and setup the iterator components.
Definition: Analysis.cpp:474
static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, SmallVectorImpl< unsigned > &RetIndices, SmallVectorImpl< unsigned > &CallIndices, bool AllowDifferingSizes, const TargetLoweringBase &TLI, const DataLayout &DL)
Return true if this scalar return value only has bits discarded on its path from the "tail call" to t...
Definition: Analysis.cpp:365
static void collectEHScopeMembers(DenseMap< const MachineBasicBlock *, int > &EHScopeMembership, int EHScope, const MachineBasicBlock *MBB)
Definition: Analysis.cpp:729
static bool indexReallyValid(Type *T, unsigned Idx)
For an aggregate type, determine whether a given index is within bounds or not.
Definition: Analysis.cpp:409
static bool nextRealType(SmallVectorImpl< Type * > &SubTypes, SmallVectorImpl< unsigned > &Path)
Set the iterator data-structures to the next non-empty, non-aggregate subtype.
Definition: Analysis.cpp:503
static bool isPointerBitcastEqualTo(const Value *A, const Value *B)
Check whether B is a bitcast of a pointer type to another pointer type, which is equal to A.
Definition: Analysis.cpp:630
static bool advanceToNextLeafType(SmallVectorImpl< Type * > &SubTypes, SmallVectorImpl< unsigned > &Path)
Move the given iterators to the next leaf type in depth first traversal.
Definition: Analysis.cpp:433
static const Value * getNoopInput(const Value *V, SmallVectorImpl< unsigned > &ValLoc, unsigned &DataBits, const TargetLoweringBase &TLI, const DataLayout &DL)
Look through operations that will be free to find the earliest source of this value.
Definition: Analysis.cpp:283
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
Module.h This file contains the declarations for the Module class.
#define P(N)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:155
iterator end() const
Definition: ArrayRef.h:152
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
iterator begin() const
Definition: ArrayRef.h:151
reverse_iterator rbegin() const
Definition: ArrayRef.h:154
Class to represent array types.
Definition: DerivedTypes.h:357
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:325
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:88
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This instruction extracts a struct member or array element value from an aggregate value.
static Type * getIndexedType(Type *Agg, ArrayRef< unsigned > Idxs)
Returns the type of the element that would be extracted with an extractvalue instruction with the spe...
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1999
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
This instruction inserts a struct field of array element value into an aggregate value.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
bool isEHPad() const
Returns true if the block is a landing pad.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
iterator_range< succ_iterator > successors()
bool isEHScopeReturnBlock() const
Convenience function that returns true if the bock ends in a EH scope return instruction.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock & front() const
Return a value (possibly void), from a function.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:623
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:653
Class to represent struct types.
Definition: DerivedTypes.h:213
Type::subtype_iterator element_iterator
Definition: DerivedTypes.h:315
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
virtual const TargetInstrInfo * getInstrInfo() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:258
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:297
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1731
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1447
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred)
getICmpCondCode - Return the ISD condition code corresponding to the given LLVM IR integer condition ...
Definition: Analysis.cpp:221
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2430
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2129
bool returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI)
Test if given that the input instruction is in the tail call position if the return type or any attri...
Definition: Analysis.cpp:644
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
void computeValueLLTs(const DataLayout &DL, Type &Ty, SmallVectorImpl< LLT > &ValueTys, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
computeValueLLTs - Given an LLVM IR type, compute a sequence of LLTs that represent all the individua...
Definition: Analysis.cpp:129
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition: Analysis.cpp:187
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:569
ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC)
getFCmpCodeWithoutNaN - Given an ISD condition code comparing floats, return the equivalent code if w...
Definition: Analysis.cpp:209
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
GlobalValue * ExtractTypeInfo(Value *V)
ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
Definition: Analysis.cpp:165
bool isInTailCallPosition(const CallBase &Call, const TargetMachine &TM)
Test if the given instruction is in a position to be optimized with a tail-call.
Definition: Analysis.cpp:523
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:121
unsigned ComputeLinearIndex(Type *Ty, const unsigned *Indices, const unsigned *IndicesEnd, unsigned CurIndex=0)
Compute the linearized index of a member in a nested aggregate/struct/array.
Definition: Analysis.cpp:33
DenseMap< const MachineBasicBlock *, int > getEHScopeMembership(const MachineFunction &MF)
Definition: Analysis.cpp:758
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:615