LLVM 20.0.0git
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for load, store and alloca.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/Loads.h"
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/LLVMContext.h"
27using namespace llvm;
28using namespace PatternMatch;
29
30#define DEBUG_TYPE "instcombine"
31
32STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34
36 "instcombine-max-copied-from-constant-users", cl::init(300),
37 cl::desc("Maximum users to visit in copy from constant transform"),
39
40namespace llvm {
42 "enable-infer-alignment-pass", cl::init(true), cl::Hidden, cl::ZeroOrMore,
43 cl::desc("Enable the InferAlignment pass, disabling alignment inference in "
44 "InstCombine"));
45}
46
47/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
48/// pointer to an alloca. Ignore any reads of the pointer, return false if we
49/// see any stores or other unknown uses. If we see pointer arithmetic, keep
50/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
51/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
52/// the alloca, and if the source pointer is a pointer to a constant memory
53/// location, we can optimize this.
54static bool
56 MemTransferInst *&TheCopy,
58 // We track lifetime intrinsics as we encounter them. If we decide to go
59 // ahead and replace the value with the memory location, this lets the caller
60 // quickly eliminate the markers.
61
62 using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
65 Worklist.emplace_back(V, false);
66 while (!Worklist.empty()) {
67 ValueAndIsOffset Elem = Worklist.pop_back_val();
68 if (!Visited.insert(Elem).second)
69 continue;
70 if (Visited.size() > MaxCopiedFromConstantUsers)
71 return false;
72
73 const auto [Value, IsOffset] = Elem;
74 for (auto &U : Value->uses()) {
75 auto *I = cast<Instruction>(U.getUser());
76
77 if (auto *LI = dyn_cast<LoadInst>(I)) {
78 // Ignore non-volatile loads, they are always ok.
79 if (!LI->isSimple()) return false;
80 continue;
81 }
82
83 if (isa<PHINode, SelectInst>(I)) {
84 // We set IsOffset=true, to forbid the memcpy from occurring after the
85 // phi: If one of the phi operands is not based on the alloca, we
86 // would incorrectly omit a write.
87 Worklist.emplace_back(I, true);
88 continue;
89 }
90 if (isa<BitCastInst, AddrSpaceCastInst>(I)) {
91 // If uses of the bitcast are ok, we are ok.
92 Worklist.emplace_back(I, IsOffset);
93 continue;
94 }
95 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
96 // If the GEP has all zero indices, it doesn't offset the pointer. If it
97 // doesn't, it does.
98 Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
99 continue;
100 }
101
102 if (auto *Call = dyn_cast<CallBase>(I)) {
103 // If this is the function being called then we treat it like a load and
104 // ignore it.
105 if (Call->isCallee(&U))
106 continue;
107
108 unsigned DataOpNo = Call->getDataOperandNo(&U);
109 bool IsArgOperand = Call->isArgOperand(&U);
110
111 // Inalloca arguments are clobbered by the call.
112 if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
113 return false;
114
115 // If this call site doesn't modify the memory, then we know it is just
116 // a load (but one that potentially returns the value itself), so we can
117 // ignore it if we know that the value isn't captured.
118 bool NoCapture = Call->doesNotCapture(DataOpNo);
119 if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) ||
120 (Call->onlyReadsMemory(DataOpNo) && NoCapture))
121 continue;
122
123 // If this is being passed as a byval argument, the caller is making a
124 // copy, so it is only a read of the alloca.
125 if (IsArgOperand && Call->isByValArgument(DataOpNo))
126 continue;
127 }
128
129 // Lifetime intrinsics can be handled by the caller.
130 if (I->isLifetimeStartOrEnd()) {
131 assert(I->use_empty() && "Lifetime markers have no result to use!");
132 ToDelete.push_back(I);
133 continue;
134 }
135
136 // If this is isn't our memcpy/memmove, reject it as something we can't
137 // handle.
138 MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
139 if (!MI)
140 return false;
141
142 // If the transfer is volatile, reject it.
143 if (MI->isVolatile())
144 return false;
145
146 // If the transfer is using the alloca as a source of the transfer, then
147 // ignore it since it is a load (unless the transfer is volatile).
148 if (U.getOperandNo() == 1)
149 continue;
150
151 // If we already have seen a copy, reject the second one.
152 if (TheCopy) return false;
153
154 // If the pointer has been offset from the start of the alloca, we can't
155 // safely handle this.
156 if (IsOffset) return false;
157
158 // If the memintrinsic isn't using the alloca as the dest, reject it.
159 if (U.getOperandNo() != 0) return false;
160
161 // If the source of the memcpy/move is not constant, reject it.
162 if (isModSet(AA->getModRefInfoMask(MI->getSource())))
163 return false;
164
165 // Otherwise, the transform is safe. Remember the copy instruction.
166 TheCopy = MI;
167 }
168 }
169 return true;
170}
171
172/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
173/// modified by a copy from a constant memory location. If we can prove this, we
174/// can replace any uses of the alloca with uses of the memory location
175/// directly.
176static MemTransferInst *
178 AllocaInst *AI,
180 MemTransferInst *TheCopy = nullptr;
181 if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
182 return TheCopy;
183 return nullptr;
184}
185
186/// Returns true if V is dereferenceable for size of alloca.
187static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
188 const DataLayout &DL) {
189 if (AI->isArrayAllocation())
190 return false;
191 uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
192 if (!AllocaSize)
193 return false;
195 APInt(64, AllocaSize), DL);
196}
197
199 AllocaInst &AI, DominatorTree &DT) {
200 // Check for array size of 1 (scalar allocation).
201 if (!AI.isArrayAllocation()) {
202 // i32 1 is the canonical array size for scalar allocations.
203 if (AI.getArraySize()->getType()->isIntegerTy(32))
204 return nullptr;
205
206 // Canonicalize it.
207 return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
208 }
209
210 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
211 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
212 if (C->getValue().getActiveBits() <= 64) {
213 Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
214 AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
215 nullptr, AI.getName());
216 New->setAlignment(AI.getAlign());
217 New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
218
219 replaceAllDbgUsesWith(AI, *New, *New, DT);
220 return IC.replaceInstUsesWith(AI, New);
221 }
222 }
223
224 if (isa<UndefValue>(AI.getArraySize()))
226
227 // Ensure that the alloca array size argument has type equal to the offset
228 // size of the alloca() pointer, which, in the tyical case, is intptr_t,
229 // so that any casting is exposed early.
230 Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
231 if (AI.getArraySize()->getType() != PtrIdxTy) {
232 Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
233 return IC.replaceOperand(AI, 0, V);
234 }
235
236 return nullptr;
237}
238
239namespace {
240// If I and V are pointers in different address space, it is not allowed to
241// use replaceAllUsesWith since I and V have different types. A
242// non-target-specific transformation should not use addrspacecast on V since
243// the two address space may be disjoint depending on target.
244//
245// This class chases down uses of the old pointer until reaching the load
246// instructions, then replaces the old pointer in the load instructions with
247// the new pointer. If during the chasing it sees bitcast or GEP, it will
248// create new bitcast or GEP with the new pointer and use them in the load
249// instruction.
250class PointerReplacer {
251public:
252 PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
253 : IC(IC), Root(Root), FromAS(SrcAS) {}
254
255 bool collectUsers();
256 void replacePointer(Value *V);
257
258private:
259 bool collectUsersRecursive(Instruction &I);
260 void replace(Instruction *I);
261 Value *getReplacement(Value *I);
262 bool isAvailable(Instruction *I) const {
263 return I == &Root || Worklist.contains(I);
264 }
265
266 bool isEqualOrValidAddrSpaceCast(const Instruction *I,
267 unsigned FromAS) const {
268 const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
269 if (!ASC)
270 return false;
271 unsigned ToAS = ASC->getDestAddressSpace();
272 return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
273 }
274
275 SmallPtrSet<Instruction *, 32> ValuesToRevisit;
279 Instruction &Root;
280 unsigned FromAS;
281};
282} // end anonymous namespace
283
284bool PointerReplacer::collectUsers() {
285 if (!collectUsersRecursive(Root))
286 return false;
287
288 // Ensure that all outstanding (indirect) users of I
289 // are inserted into the Worklist. Return false
290 // otherwise.
291 return llvm::set_is_subset(ValuesToRevisit, Worklist);
292}
293
294bool PointerReplacer::collectUsersRecursive(Instruction &I) {
295 for (auto *U : I.users()) {
296 auto *Inst = cast<Instruction>(&*U);
297 if (auto *Load = dyn_cast<LoadInst>(Inst)) {
298 if (Load->isVolatile())
299 return false;
300 Worklist.insert(Load);
301 } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
302 // All incoming values must be instructions for replacability
303 if (any_of(PHI->incoming_values(),
304 [](Value *V) { return !isa<Instruction>(V); }))
305 return false;
306
307 // If at least one incoming value of the PHI is not in Worklist,
308 // store the PHI for revisiting and skip this iteration of the
309 // loop.
310 if (any_of(PHI->incoming_values(), [this](Value *V) {
311 return !isAvailable(cast<Instruction>(V));
312 })) {
313 ValuesToRevisit.insert(Inst);
314 continue;
315 }
316
317 Worklist.insert(PHI);
318 if (!collectUsersRecursive(*PHI))
319 return false;
320 } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
321 if (!isa<Instruction>(SI->getTrueValue()) ||
322 !isa<Instruction>(SI->getFalseValue()))
323 return false;
324
325 if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
326 !isAvailable(cast<Instruction>(SI->getFalseValue()))) {
327 ValuesToRevisit.insert(Inst);
328 continue;
329 }
330 Worklist.insert(SI);
331 if (!collectUsersRecursive(*SI))
332 return false;
333 } else if (isa<GetElementPtrInst>(Inst)) {
334 Worklist.insert(Inst);
335 if (!collectUsersRecursive(*Inst))
336 return false;
337 } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
338 if (MI->isVolatile())
339 return false;
340 Worklist.insert(Inst);
341 } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
342 Worklist.insert(Inst);
343 if (!collectUsersRecursive(*Inst))
344 return false;
345 } else if (Inst->isLifetimeStartOrEnd()) {
346 continue;
347 } else {
348 // TODO: For arbitrary uses with address space mismatches, should we check
349 // if we can introduce a valid addrspacecast?
350 LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
351 return false;
352 }
353 }
354
355 return true;
356}
357
358Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
359
360void PointerReplacer::replace(Instruction *I) {
361 if (getReplacement(I))
362 return;
363
364 if (auto *LT = dyn_cast<LoadInst>(I)) {
365 auto *V = getReplacement(LT->getPointerOperand());
366 assert(V && "Operand not replaced");
367 auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
368 LT->getAlign(), LT->getOrdering(),
369 LT->getSyncScopeID());
370 NewI->takeName(LT);
371 copyMetadataForLoad(*NewI, *LT);
372
373 IC.InsertNewInstWith(NewI, LT->getIterator());
374 IC.replaceInstUsesWith(*LT, NewI);
375 WorkMap[LT] = NewI;
376 } else if (auto *PHI = dyn_cast<PHINode>(I)) {
377 Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
378 auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
379 PHI->getName(), PHI->getIterator());
380 for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
381 NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
382 PHI->getIncomingBlock(I));
383 WorkMap[PHI] = NewPHI;
384 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
385 auto *V = getReplacement(GEP->getPointerOperand());
386 assert(V && "Operand not replaced");
387 SmallVector<Value *, 8> Indices(GEP->indices());
388 auto *NewI =
389 GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
390 IC.InsertNewInstWith(NewI, GEP->getIterator());
391 NewI->takeName(GEP);
392 NewI->setNoWrapFlags(GEP->getNoWrapFlags());
393 WorkMap[GEP] = NewI;
394 } else if (auto *SI = dyn_cast<SelectInst>(I)) {
395 Value *TrueValue = SI->getTrueValue();
396 Value *FalseValue = SI->getFalseValue();
397 if (Value *Replacement = getReplacement(TrueValue))
398 TrueValue = Replacement;
399 if (Value *Replacement = getReplacement(FalseValue))
400 FalseValue = Replacement;
401 auto *NewSI = SelectInst::Create(SI->getCondition(), TrueValue, FalseValue,
402 SI->getName(), nullptr, SI);
403 IC.InsertNewInstWith(NewSI, SI->getIterator());
404 NewSI->takeName(SI);
405 WorkMap[SI] = NewSI;
406 } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
407 auto *DestV = MemCpy->getRawDest();
408 auto *SrcV = MemCpy->getRawSource();
409
410 if (auto *DestReplace = getReplacement(DestV))
411 DestV = DestReplace;
412 if (auto *SrcReplace = getReplacement(SrcV))
413 SrcV = SrcReplace;
414
415 IC.Builder.SetInsertPoint(MemCpy);
416 auto *NewI = IC.Builder.CreateMemTransferInst(
417 MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV,
418 MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile());
419 AAMDNodes AAMD = MemCpy->getAAMetadata();
420 if (AAMD)
421 NewI->setAAMetadata(AAMD);
422
423 IC.eraseInstFromFunction(*MemCpy);
424 WorkMap[MemCpy] = NewI;
425 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
426 auto *V = getReplacement(ASC->getPointerOperand());
427 assert(V && "Operand not replaced");
428 assert(isEqualOrValidAddrSpaceCast(
429 ASC, V->getType()->getPointerAddressSpace()) &&
430 "Invalid address space cast!");
431
432 if (V->getType()->getPointerAddressSpace() !=
433 ASC->getType()->getPointerAddressSpace()) {
434 auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
435 NewI->takeName(ASC);
436 IC.InsertNewInstWith(NewI, ASC->getIterator());
437 WorkMap[ASC] = NewI;
438 } else {
439 WorkMap[ASC] = V;
440 }
441
442 } else {
443 llvm_unreachable("should never reach here");
444 }
445}
446
447void PointerReplacer::replacePointer(Value *V) {
448#ifndef NDEBUG
449 auto *PT = cast<PointerType>(Root.getType());
450 auto *NT = cast<PointerType>(V->getType());
451 assert(PT != NT && "Invalid usage");
452#endif
453 WorkMap[&Root] = V;
454
455 for (Instruction *Workitem : Worklist)
456 replace(Workitem);
457}
458
460 if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
461 return I;
462
463 if (AI.getAllocatedType()->isSized()) {
464 // Move all alloca's of zero byte objects to the entry block and merge them
465 // together. Note that we only do this for alloca's, because malloc should
466 // allocate and return a unique pointer, even for a zero byte allocation.
468 // For a zero sized alloca there is no point in doing an array allocation.
469 // This is helpful if the array size is a complicated expression not used
470 // elsewhere.
471 if (AI.isArrayAllocation())
472 return replaceOperand(AI, 0,
473 ConstantInt::get(AI.getArraySize()->getType(), 1));
474
475 // Get the first instruction in the entry block.
476 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
477 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
478 if (FirstInst != &AI) {
479 // If the entry block doesn't start with a zero-size alloca then move
480 // this one to the start of the entry block. There is no problem with
481 // dominance as the array size was forced to a constant earlier already.
482 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
483 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
485 .getKnownMinValue() != 0) {
486 AI.moveBefore(FirstInst);
487 return &AI;
488 }
489
490 // Replace this zero-sized alloca with the one at the start of the entry
491 // block after ensuring that the address will be aligned enough for both
492 // types.
493 const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
494 EntryAI->setAlignment(MaxAlign);
495 return replaceInstUsesWith(AI, EntryAI);
496 }
497 }
498 }
499
500 // Check to see if this allocation is only modified by a memcpy/memmove from
501 // a memory location whose alignment is equal to or exceeds that of the
502 // allocation. If this is the case, we can change all users to use the
503 // constant memory location instead. This is commonly produced by the CFE by
504 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
505 // is only subsequently read.
507 if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
508 Value *TheSrc = Copy->getSource();
509 Align AllocaAlign = AI.getAlign();
510 Align SourceAlign = getOrEnforceKnownAlignment(
511 TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
512 if (AllocaAlign <= SourceAlign &&
513 isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
514 !isa<Instruction>(TheSrc)) {
515 // FIXME: Can we sink instructions without violating dominance when TheSrc
516 // is an instruction instead of a constant or argument?
517 LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
518 LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
519 unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
520 if (AI.getAddressSpace() == SrcAddrSpace) {
521 for (Instruction *Delete : ToDelete)
522 eraseInstFromFunction(*Delete);
523
524 Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
526 ++NumGlobalCopies;
527 return NewI;
528 }
529
530 PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
531 if (PtrReplacer.collectUsers()) {
532 for (Instruction *Delete : ToDelete)
533 eraseInstFromFunction(*Delete);
534
535 PtrReplacer.replacePointer(TheSrc);
536 ++NumGlobalCopies;
537 }
538 }
539 }
540
541 // At last, use the generic allocation site handler to aggressively remove
542 // unused allocas.
543 return visitAllocSite(AI);
544}
545
546// Are we allowed to form a atomic load or store of this type?
547static bool isSupportedAtomicType(Type *Ty) {
548 return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
549}
550
551/// Helper to combine a load to a new type.
552///
553/// This just does the work of combining a load to a new type. It handles
554/// metadata, etc., and returns the new instruction. The \c NewTy should be the
555/// loaded *value* type. This will convert it to a pointer, cast the operand to
556/// that pointer type, load it, etc.
557///
558/// Note that this will create all of the instructions with whatever insert
559/// point the \c InstCombinerImpl currently is using.
561 const Twine &Suffix) {
562 assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
563 "can't fold an atomic load to requested type");
564
565 LoadInst *NewLoad =
566 Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(),
567 LI.isVolatile(), LI.getName() + Suffix);
568 NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
569 copyMetadataForLoad(*NewLoad, LI);
570 return NewLoad;
571}
572
573/// Combine a store to a new type.
574///
575/// Returns the newly created store instruction.
577 Value *V) {
578 assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
579 "can't fold an atomic store of requested type");
580
581 Value *Ptr = SI.getPointerOperand();
583 SI.getAllMetadata(MD);
584
585 StoreInst *NewStore =
586 IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
587 NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
588 for (const auto &MDPair : MD) {
589 unsigned ID = MDPair.first;
590 MDNode *N = MDPair.second;
591 // Note, essentially every kind of metadata should be preserved here! This
592 // routine is supposed to clone a store instruction changing *only its
593 // type*. The only metadata it makes sense to drop is metadata which is
594 // invalidated when the pointer type changes. This should essentially
595 // never be the case in LLVM, but we explicitly switch over only known
596 // metadata to be conservatively correct. If you are adding metadata to
597 // LLVM which pertains to stores, you almost certainly want to add it
598 // here.
599 switch (ID) {
600 case LLVMContext::MD_dbg:
601 case LLVMContext::MD_DIAssignID:
602 case LLVMContext::MD_tbaa:
603 case LLVMContext::MD_prof:
604 case LLVMContext::MD_fpmath:
605 case LLVMContext::MD_tbaa_struct:
606 case LLVMContext::MD_alias_scope:
607 case LLVMContext::MD_noalias:
608 case LLVMContext::MD_nontemporal:
609 case LLVMContext::MD_mem_parallel_loop_access:
610 case LLVMContext::MD_access_group:
611 // All of these directly apply.
612 NewStore->setMetadata(ID, N);
613 break;
614 case LLVMContext::MD_invariant_load:
615 case LLVMContext::MD_nonnull:
616 case LLVMContext::MD_noundef:
617 case LLVMContext::MD_range:
618 case LLVMContext::MD_align:
619 case LLVMContext::MD_dereferenceable:
620 case LLVMContext::MD_dereferenceable_or_null:
621 // These don't apply for stores.
622 break;
623 }
624 }
625
626 return NewStore;
627}
628
629/// Combine loads to match the type of their uses' value after looking
630/// through intervening bitcasts.
631///
632/// The core idea here is that if the result of a load is used in an operation,
633/// we should load the type most conducive to that operation. For example, when
634/// loading an integer and converting that immediately to a pointer, we should
635/// instead directly load a pointer.
636///
637/// However, this routine must never change the width of a load or the number of
638/// loads as that would introduce a semantic change. This combine is expected to
639/// be a semantic no-op which just allows loads to more closely model the types
640/// of their consuming operations.
641///
642/// Currently, we also refuse to change the precise type used for an atomic load
643/// or a volatile load. This is debatable, and might be reasonable to change
644/// later. However, it is risky in case some backend or other part of LLVM is
645/// relying on the exact type loaded to select appropriate atomic operations.
647 LoadInst &Load) {
648 // FIXME: We could probably with some care handle both volatile and ordered
649 // atomic loads here but it isn't clear that this is important.
650 if (!Load.isUnordered())
651 return nullptr;
652
653 if (Load.use_empty())
654 return nullptr;
655
656 // swifterror values can't be bitcasted.
657 if (Load.getPointerOperand()->isSwiftError())
658 return nullptr;
659
660 // Fold away bit casts of the loaded value by loading the desired type.
661 // Note that we should not do this for pointer<->integer casts,
662 // because that would result in type punning.
663 if (Load.hasOneUse()) {
664 // Don't transform when the type is x86_amx, it makes the pass that lower
665 // x86_amx type happy.
666 Type *LoadTy = Load.getType();
667 if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
668 assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
669 if (BC->getType()->isX86_AMXTy())
670 return nullptr;
671 }
672
673 if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
674 Type *DestTy = CastUser->getDestTy();
675 if (CastUser->isNoopCast(IC.getDataLayout()) &&
676 LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
677 (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
678 LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
679 CastUser->replaceAllUsesWith(NewLoad);
680 IC.eraseInstFromFunction(*CastUser);
681 return &Load;
682 }
683 }
684 }
685
686 // FIXME: We should also canonicalize loads of vectors when their elements are
687 // cast to other types.
688 return nullptr;
689}
690
692 // FIXME: We could probably with some care handle both volatile and atomic
693 // stores here but it isn't clear that this is important.
694 if (!LI.isSimple())
695 return nullptr;
696
697 Type *T = LI.getType();
698 if (!T->isAggregateType())
699 return nullptr;
700
701 StringRef Name = LI.getName();
702
703 if (auto *ST = dyn_cast<StructType>(T)) {
704 // If the struct only have one element, we unpack.
705 auto NumElements = ST->getNumElements();
706 if (NumElements == 1) {
707 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
708 ".unpack");
709 NewLoad->setAAMetadata(LI.getAAMetadata());
711 PoisonValue::get(T), NewLoad, 0, Name));
712 }
713
714 // We don't want to break loads with padding here as we'd loose
715 // the knowledge that padding exists for the rest of the pipeline.
716 const DataLayout &DL = IC.getDataLayout();
717 auto *SL = DL.getStructLayout(ST);
718
719 // Don't unpack for structure with scalable vector.
720 if (SL->getSizeInBits().isScalable())
721 return nullptr;
722
723 if (SL->hasPadding())
724 return nullptr;
725
726 const auto Align = LI.getAlign();
727 auto *Addr = LI.getPointerOperand();
728 auto *IdxType = Type::getInt32Ty(T->getContext());
729 auto *Zero = ConstantInt::get(IdxType, 0);
730
732 for (unsigned i = 0; i < NumElements; i++) {
733 Value *Indices[2] = {
734 Zero,
735 ConstantInt::get(IdxType, i),
736 };
737 auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices),
738 Name + ".elt");
739 auto *L = IC.Builder.CreateAlignedLoad(
740 ST->getElementType(i), Ptr,
741 commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
742 // Propagate AA metadata. It'll still be valid on the narrowed load.
743 L->setAAMetadata(LI.getAAMetadata());
744 V = IC.Builder.CreateInsertValue(V, L, i);
745 }
746
747 V->setName(Name);
748 return IC.replaceInstUsesWith(LI, V);
749 }
750
751 if (auto *AT = dyn_cast<ArrayType>(T)) {
752 auto *ET = AT->getElementType();
753 auto NumElements = AT->getNumElements();
754 if (NumElements == 1) {
755 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
756 NewLoad->setAAMetadata(LI.getAAMetadata());
758 PoisonValue::get(T), NewLoad, 0, Name));
759 }
760
761 // Bail out if the array is too large. Ideally we would like to optimize
762 // arrays of arbitrary size but this has a terrible impact on compile time.
763 // The threshold here is chosen arbitrarily, maybe needs a little bit of
764 // tuning.
765 if (NumElements > IC.MaxArraySizeForCombine)
766 return nullptr;
767
768 const DataLayout &DL = IC.getDataLayout();
769 TypeSize EltSize = DL.getTypeAllocSize(ET);
770 const auto Align = LI.getAlign();
771
772 auto *Addr = LI.getPointerOperand();
773 auto *IdxType = Type::getInt64Ty(T->getContext());
774 auto *Zero = ConstantInt::get(IdxType, 0);
775
778 for (uint64_t i = 0; i < NumElements; i++) {
779 Value *Indices[2] = {
780 Zero,
781 ConstantInt::get(IdxType, i),
782 };
783 auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
784 Name + ".elt");
785 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
786 auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
787 EltAlign, Name + ".unpack");
788 L->setAAMetadata(LI.getAAMetadata());
789 V = IC.Builder.CreateInsertValue(V, L, i);
790 Offset += EltSize;
791 }
792
793 V->setName(Name);
794 return IC.replaceInstUsesWith(LI, V);
795 }
796
797 return nullptr;
798}
799
800// If we can determine that all possible objects pointed to by the provided
801// pointer value are, not only dereferenceable, but also definitively less than
802// or equal to the provided maximum size, then return true. Otherwise, return
803// false (constant global values and allocas fall into this category).
804//
805// FIXME: This should probably live in ValueTracking (or similar).
807 const DataLayout &DL) {
809 SmallVector<Value *, 4> Worklist(1, V);
810
811 do {
812 Value *P = Worklist.pop_back_val();
813 P = P->stripPointerCasts();
814
815 if (!Visited.insert(P).second)
816 continue;
817
818 if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
819 Worklist.push_back(SI->getTrueValue());
820 Worklist.push_back(SI->getFalseValue());
821 continue;
822 }
823
824 if (PHINode *PN = dyn_cast<PHINode>(P)) {
825 append_range(Worklist, PN->incoming_values());
826 continue;
827 }
828
829 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
830 if (GA->isInterposable())
831 return false;
832 Worklist.push_back(GA->getAliasee());
833 continue;
834 }
835
836 // If we know how big this object is, and it is less than MaxSize, continue
837 // searching. Otherwise, return false.
838 if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
839 if (!AI->getAllocatedType()->isSized())
840 return false;
841
842 ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
843 if (!CS)
844 return false;
845
846 TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType());
847 if (TS.isScalable())
848 return false;
849 // Make sure that, even if the multiplication below would wrap as an
850 // uint64_t, we still do the right thing.
851 if ((CS->getValue().zext(128) * APInt(128, TS.getFixedValue()))
852 .ugt(MaxSize))
853 return false;
854 continue;
855 }
856
857 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
858 if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
859 return false;
860
861 uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
862 if (InitSize > MaxSize)
863 return false;
864 continue;
865 }
866
867 return false;
868 } while (!Worklist.empty());
869
870 return true;
871}
872
873// If we're indexing into an object of a known size, and the outer index is
874// not a constant, but having any value but zero would lead to undefined
875// behavior, replace it with zero.
876//
877// For example, if we have:
878// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
879// ...
880// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
881// ... = load i32* %arrayidx, align 4
882// Then we know that we can replace %x in the GEP with i64 0.
883//
884// FIXME: We could fold any GEP index to zero that would cause UB if it were
885// not zero. Currently, we only handle the first such index. Also, we could
886// also search through non-zero constant indices if we kept track of the
887// offsets those indices implied.
889 GetElementPtrInst *GEPI, Instruction *MemI,
890 unsigned &Idx) {
891 if (GEPI->getNumOperands() < 2)
892 return false;
893
894 // Find the first non-zero index of a GEP. If all indices are zero, return
895 // one past the last index.
896 auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
897 unsigned I = 1;
898 for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
899 Value *V = GEPI->getOperand(I);
900 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
901 if (CI->isZero())
902 continue;
903
904 break;
905 }
906
907 return I;
908 };
909
910 // Skip through initial 'zero' indices, and find the corresponding pointer
911 // type. See if the next index is not a constant.
912 Idx = FirstNZIdx(GEPI);
913 if (Idx == GEPI->getNumOperands())
914 return false;
915 if (isa<Constant>(GEPI->getOperand(Idx)))
916 return false;
917
918 SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
919 Type *SourceElementType = GEPI->getSourceElementType();
920 // Size information about scalable vectors is not available, so we cannot
921 // deduce whether indexing at n is undefined behaviour or not. Bail out.
922 if (SourceElementType->isScalableTy())
923 return false;
924
925 Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
926 if (!AllocTy || !AllocTy->isSized())
927 return false;
928 const DataLayout &DL = IC.getDataLayout();
929 uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
930
931 // If there are more indices after the one we might replace with a zero, make
932 // sure they're all non-negative. If any of them are negative, the overall
933 // address being computed might be before the base address determined by the
934 // first non-zero index.
935 auto IsAllNonNegative = [&]() {
936 for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
937 KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
938 if (Known.isNonNegative())
939 continue;
940 return false;
941 }
942
943 return true;
944 };
945
946 // FIXME: If the GEP is not inbounds, and there are extra indices after the
947 // one we'll replace, those could cause the address computation to wrap
948 // (rendering the IsAllNonNegative() check below insufficient). We can do
949 // better, ignoring zero indices (and other indices we can prove small
950 // enough not to wrap).
951 if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
952 return false;
953
954 // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
955 // also known to be dereferenceable.
956 return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
957 IsAllNonNegative();
958}
959
960// If we're indexing into an object with a variable index for the memory
961// access, but the object has only one element, we can assume that the index
962// will always be zero. If we replace the GEP, return it.
964 Instruction &MemI) {
965 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
966 unsigned Idx;
967 if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
968 Instruction *NewGEPI = GEPI->clone();
969 NewGEPI->setOperand(Idx,
970 ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
971 IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
972 return NewGEPI;
973 }
974 }
975
976 return nullptr;
977}
978
980 if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
981 return false;
982
983 auto *Ptr = SI.getPointerOperand();
984 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
985 Ptr = GEPI->getOperand(0);
986 return (isa<ConstantPointerNull>(Ptr) &&
987 !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
988}
989
991 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
992 const Value *GEPI0 = GEPI->getOperand(0);
993 if (isa<ConstantPointerNull>(GEPI0) &&
994 !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
995 return true;
996 }
997 if (isa<UndefValue>(Op) ||
998 (isa<ConstantPointerNull>(Op) &&
1000 return true;
1001 return false;
1002}
1003
1005 Value *Op = LI.getOperand(0);
1007 return replaceInstUsesWith(LI, Res);
1008
1009 // Try to canonicalize the loaded type.
1010 if (Instruction *Res = combineLoadToOperationType(*this, LI))
1011 return Res;
1012
1014 // Attempt to improve the alignment.
1015 Align KnownAlign = getOrEnforceKnownAlignment(
1016 Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);
1017 if (KnownAlign > LI.getAlign())
1018 LI.setAlignment(KnownAlign);
1019 }
1020
1021 // Replace GEP indices if possible.
1022 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
1023 return replaceOperand(LI, 0, NewGEPI);
1024
1025 if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1026 return Res;
1027
1028 // Do really simple store-to-load forwarding and load CSE, to catch cases
1029 // where there are several consecutive memory accesses to the same location,
1030 // separated by a few arithmetic operations.
1031 bool IsLoadCSE = false;
1032 BatchAAResults BatchAA(*AA);
1033 if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
1034 if (IsLoadCSE)
1035 combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1036
1037 return replaceInstUsesWith(
1038 LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1039 LI.getName() + ".cast"));
1040 }
1041
1042 // None of the following transforms are legal for volatile/ordered atomic
1043 // loads. Most of them do apply for unordered atomics.
1044 if (!LI.isUnordered()) return nullptr;
1045
1046 // load(gep null, ...) -> unreachable
1047 // load null/undef -> unreachable
1048 // TODO: Consider a target hook for valid address spaces for this xforms.
1051 return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
1052 }
1053
1054 if (Op->hasOneUse()) {
1055 // Change select and PHI nodes to select values instead of addresses: this
1056 // helps alias analysis out a lot, allows many others simplifications, and
1057 // exposes redundancy in the code.
1058 //
1059 // Note that we cannot do the transformation unless we know that the
1060 // introduced loads cannot trap! Something like this is valid as long as
1061 // the condition is always false: load (select bool %C, int* null, int* %G),
1062 // but it would not be valid if we transformed it to load from null
1063 // unconditionally.
1064 //
1065 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1066 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1067 Align Alignment = LI.getAlign();
1068 if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
1069 Alignment, DL, SI) &&
1070 isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
1071 Alignment, DL, SI)) {
1072 LoadInst *V1 =
1073 Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1074 SI->getOperand(1)->getName() + ".val");
1075 LoadInst *V2 =
1076 Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1077 SI->getOperand(2)->getName() + ".val");
1078 assert(LI.isUnordered() && "implied by above");
1079 V1->setAlignment(Alignment);
1080 V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1081 V2->setAlignment(Alignment);
1082 V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1083 return SelectInst::Create(SI->getCondition(), V1, V2);
1084 }
1085
1086 // load (select (cond, null, P)) -> load P
1087 if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1088 !NullPointerIsDefined(SI->getFunction(),
1089 LI.getPointerAddressSpace()))
1090 return replaceOperand(LI, 0, SI->getOperand(2));
1091
1092 // load (select (cond, P, null)) -> load P
1093 if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1094 !NullPointerIsDefined(SI->getFunction(),
1095 LI.getPointerAddressSpace()))
1096 return replaceOperand(LI, 0, SI->getOperand(1));
1097 }
1098 }
1099 return nullptr;
1100}
1101
1102/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1103///
1104/// \returns underlying value that was "cast", or nullptr otherwise.
1105///
1106/// For example, if we have:
1107///
1108/// %E0 = extractelement <2 x double> %U, i32 0
1109/// %V0 = insertvalue [2 x double] undef, double %E0, 0
1110/// %E1 = extractelement <2 x double> %U, i32 1
1111/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1112///
1113/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1114/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1115/// Note that %U may contain non-undef values where %V1 has undef.
1117 Value *U = nullptr;
1118 while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1119 auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1120 if (!E)
1121 return nullptr;
1122 auto *W = E->getVectorOperand();
1123 if (!U)
1124 U = W;
1125 else if (U != W)
1126 return nullptr;
1127 auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1128 if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1129 return nullptr;
1130 V = IV->getAggregateOperand();
1131 }
1132 if (!match(V, m_Undef()) || !U)
1133 return nullptr;
1134
1135 auto *UT = cast<VectorType>(U->getType());
1136 auto *VT = V->getType();
1137 // Check that types UT and VT are bitwise isomorphic.
1138 const auto &DL = IC.getDataLayout();
1139 if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1140 return nullptr;
1141 }
1142 if (auto *AT = dyn_cast<ArrayType>(VT)) {
1143 if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1144 return nullptr;
1145 } else {
1146 auto *ST = cast<StructType>(VT);
1147 if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1148 return nullptr;
1149 for (const auto *EltT : ST->elements()) {
1150 if (EltT != UT->getElementType())
1151 return nullptr;
1152 }
1153 }
1154 return U;
1155}
1156
1157/// Combine stores to match the type of value being stored.
1158///
1159/// The core idea here is that the memory does not have any intrinsic type and
1160/// where we can we should match the type of a store to the type of value being
1161/// stored.
1162///
1163/// However, this routine must never change the width of a store or the number of
1164/// stores as that would introduce a semantic change. This combine is expected to
1165/// be a semantic no-op which just allows stores to more closely model the types
1166/// of their incoming values.
1167///
1168/// Currently, we also refuse to change the precise type used for an atomic or
1169/// volatile store. This is debatable, and might be reasonable to change later.
1170/// However, it is risky in case some backend or other part of LLVM is relying
1171/// on the exact type stored to select appropriate atomic operations.
1172///
1173/// \returns true if the store was successfully combined away. This indicates
1174/// the caller must erase the store instruction. We have to let the caller erase
1175/// the store instruction as otherwise there is no way to signal whether it was
1176/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1178 // FIXME: We could probably with some care handle both volatile and ordered
1179 // atomic stores here but it isn't clear that this is important.
1180 if (!SI.isUnordered())
1181 return false;
1182
1183 // swifterror values can't be bitcasted.
1184 if (SI.getPointerOperand()->isSwiftError())
1185 return false;
1186
1187 Value *V = SI.getValueOperand();
1188
1189 // Fold away bit casts of the stored value by storing the original type.
1190 if (auto *BC = dyn_cast<BitCastInst>(V)) {
1191 assert(!BC->getType()->isX86_AMXTy() &&
1192 "store to x86_amx* should not happen!");
1193 V = BC->getOperand(0);
1194 // Don't transform when the type is x86_amx, it makes the pass that lower
1195 // x86_amx type happy.
1196 if (V->getType()->isX86_AMXTy())
1197 return false;
1198 if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1199 combineStoreToNewValue(IC, SI, V);
1200 return true;
1201 }
1202 }
1203
1204 if (Value *U = likeBitCastFromVector(IC, V))
1205 if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1206 combineStoreToNewValue(IC, SI, U);
1207 return true;
1208 }
1209
1210 // FIXME: We should also canonicalize stores of vectors when their elements
1211 // are cast to other types.
1212 return false;
1213}
1214
1216 // FIXME: We could probably with some care handle both volatile and atomic
1217 // stores here but it isn't clear that this is important.
1218 if (!SI.isSimple())
1219 return false;
1220
1221 Value *V = SI.getValueOperand();
1222 Type *T = V->getType();
1223
1224 if (!T->isAggregateType())
1225 return false;
1226
1227 if (auto *ST = dyn_cast<StructType>(T)) {
1228 // If the struct only have one element, we unpack.
1229 unsigned Count = ST->getNumElements();
1230 if (Count == 1) {
1231 V = IC.Builder.CreateExtractValue(V, 0);
1232 combineStoreToNewValue(IC, SI, V);
1233 return true;
1234 }
1235
1236 // We don't want to break loads with padding here as we'd loose
1237 // the knowledge that padding exists for the rest of the pipeline.
1238 const DataLayout &DL = IC.getDataLayout();
1239 auto *SL = DL.getStructLayout(ST);
1240
1241 // Don't unpack for structure with scalable vector.
1242 if (SL->getSizeInBits().isScalable())
1243 return false;
1244
1245 if (SL->hasPadding())
1246 return false;
1247
1248 const auto Align = SI.getAlign();
1249
1250 SmallString<16> EltName = V->getName();
1251 EltName += ".elt";
1252 auto *Addr = SI.getPointerOperand();
1253 SmallString<16> AddrName = Addr->getName();
1254 AddrName += ".repack";
1255
1256 auto *IdxType = Type::getInt32Ty(ST->getContext());
1257 auto *Zero = ConstantInt::get(IdxType, 0);
1258 for (unsigned i = 0; i < Count; i++) {
1259 Value *Indices[2] = {
1260 Zero,
1261 ConstantInt::get(IdxType, i),
1262 };
1263 auto *Ptr =
1264 IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), AddrName);
1265 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1266 auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
1267 llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1268 NS->setAAMetadata(SI.getAAMetadata());
1269 }
1270
1271 return true;
1272 }
1273
1274 if (auto *AT = dyn_cast<ArrayType>(T)) {
1275 // If the array only have one element, we unpack.
1276 auto NumElements = AT->getNumElements();
1277 if (NumElements == 1) {
1278 V = IC.Builder.CreateExtractValue(V, 0);
1279 combineStoreToNewValue(IC, SI, V);
1280 return true;
1281 }
1282
1283 // Bail out if the array is too large. Ideally we would like to optimize
1284 // arrays of arbitrary size but this has a terrible impact on compile time.
1285 // The threshold here is chosen arbitrarily, maybe needs a little bit of
1286 // tuning.
1287 if (NumElements > IC.MaxArraySizeForCombine)
1288 return false;
1289
1290 const DataLayout &DL = IC.getDataLayout();
1291 TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
1292 const auto Align = SI.getAlign();
1293
1294 SmallString<16> EltName = V->getName();
1295 EltName += ".elt";
1296 auto *Addr = SI.getPointerOperand();
1297 SmallString<16> AddrName = Addr->getName();
1298 AddrName += ".repack";
1299
1300 auto *IdxType = Type::getInt64Ty(T->getContext());
1301 auto *Zero = ConstantInt::get(IdxType, 0);
1302
1304 for (uint64_t i = 0; i < NumElements; i++) {
1305 Value *Indices[2] = {
1306 Zero,
1307 ConstantInt::get(IdxType, i),
1308 };
1309 auto *Ptr =
1310 IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
1311 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1312 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
1313 Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1314 NS->setAAMetadata(SI.getAAMetadata());
1315 Offset += EltSize;
1316 }
1317
1318 return true;
1319 }
1320
1321 return false;
1322}
1323
1324/// equivalentAddressValues - Test if A and B will obviously have the same
1325/// value. This includes recognizing that %t0 and %t1 will have the same
1326/// value in code like this:
1327/// %t0 = getelementptr \@a, 0, 3
1328/// store i32 0, i32* %t0
1329/// %t1 = getelementptr \@a, 0, 3
1330/// %t2 = load i32* %t1
1331///
1333 // Test if the values are trivially equivalent.
1334 if (A == B) return true;
1335
1336 // Test if the values come form identical arithmetic instructions.
1337 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1338 // its only used to compare two uses within the same basic block, which
1339 // means that they'll always either have the same value or one of them
1340 // will have an undefined value.
1341 if (isa<BinaryOperator>(A) ||
1342 isa<CastInst>(A) ||
1343 isa<PHINode>(A) ||
1344 isa<GetElementPtrInst>(A))
1345 if (Instruction *BI = dyn_cast<Instruction>(B))
1346 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1347 return true;
1348
1349 // Otherwise they may not be equivalent.
1350 return false;
1351}
1352
1354 Value *Val = SI.getOperand(0);
1355 Value *Ptr = SI.getOperand(1);
1356
1357 // Try to canonicalize the stored type.
1358 if (combineStoreToValueType(*this, SI))
1359 return eraseInstFromFunction(SI);
1360
1362 // Attempt to improve the alignment.
1363 const Align KnownAlign = getOrEnforceKnownAlignment(
1364 Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);
1365 if (KnownAlign > SI.getAlign())
1366 SI.setAlignment(KnownAlign);
1367 }
1368
1369 // Try to canonicalize the stored type.
1370 if (unpackStoreToAggregate(*this, SI))
1371 return eraseInstFromFunction(SI);
1372
1373 // Replace GEP indices if possible.
1374 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
1375 return replaceOperand(SI, 1, NewGEPI);
1376
1377 // Don't hack volatile/ordered stores.
1378 // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1379 if (!SI.isUnordered()) return nullptr;
1380
1381 // If the RHS is an alloca with a single use, zapify the store, making the
1382 // alloca dead.
1383 if (Ptr->hasOneUse()) {
1384 if (isa<AllocaInst>(Ptr))
1385 return eraseInstFromFunction(SI);
1386 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1387 if (isa<AllocaInst>(GEP->getOperand(0))) {
1388 if (GEP->getOperand(0)->hasOneUse())
1389 return eraseInstFromFunction(SI);
1390 }
1391 }
1392 }
1393
1394 // If we have a store to a location which is known constant, we can conclude
1395 // that the store must be storing the constant value (else the memory
1396 // wouldn't be constant), and this must be a noop.
1398 return eraseInstFromFunction(SI);
1399
1400 // Do really simple DSE, to catch cases where there are several consecutive
1401 // stores to the same location, separated by a few arithmetic operations. This
1402 // situation often occurs with bitfield accesses.
1403 BasicBlock::iterator BBI(SI);
1404 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1405 --ScanInsts) {
1406 --BBI;
1407 // Don't count debug info directives, lest they affect codegen,
1408 // and we skip pointer-to-pointer bitcasts, which are NOPs.
1409 if (BBI->isDebugOrPseudoInst()) {
1410 ScanInsts++;
1411 continue;
1412 }
1413
1414 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1415 // Prev store isn't volatile, and stores to the same location?
1416 if (PrevSI->isUnordered() &&
1417 equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
1418 PrevSI->getValueOperand()->getType() ==
1419 SI.getValueOperand()->getType()) {
1420 ++NumDeadStore;
1421 // Manually add back the original store to the worklist now, so it will
1422 // be processed after the operands of the removed store, as this may
1423 // expose additional DSE opportunities.
1424 Worklist.push(&SI);
1425 eraseInstFromFunction(*PrevSI);
1426 return nullptr;
1427 }
1428 break;
1429 }
1430
1431 // If this is a load, we have to stop. However, if the loaded value is from
1432 // the pointer we're loading and is producing the pointer we're storing,
1433 // then *this* store is dead (X = load P; store X -> P).
1434 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1435 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1436 assert(SI.isUnordered() && "can't eliminate ordering operation");
1437 return eraseInstFromFunction(SI);
1438 }
1439
1440 // Otherwise, this is a load from some other location. Stores before it
1441 // may not be dead.
1442 break;
1443 }
1444
1445 // Don't skip over loads, throws or things that can modify memory.
1446 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1447 break;
1448 }
1449
1450 // store X, null -> turns into 'unreachable' in SimplifyCFG
1451 // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1452 if (canSimplifyNullStoreOrGEP(SI)) {
1453 if (!isa<PoisonValue>(Val))
1454 return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
1455 return nullptr; // Do not modify these!
1456 }
1457
1458 // This is a non-terminator unreachable marker. Don't remove it.
1459 if (isa<UndefValue>(Ptr)) {
1460 // Remove guaranteed-to-transfer instructions before the marker.
1462 return &SI;
1463
1464 // Remove all instructions after the marker and handle dead blocks this
1465 // implies.
1467 handleUnreachableFrom(SI.getNextNode(), Worklist);
1469 return nullptr;
1470 }
1471
1472 // store undef, Ptr -> noop
1473 // FIXME: This is technically incorrect because it might overwrite a poison
1474 // value. Change to PoisonValue once #52930 is resolved.
1475 if (isa<UndefValue>(Val))
1476 return eraseInstFromFunction(SI);
1477
1478 return nullptr;
1479}
1480
1481/// Try to transform:
1482/// if () { *P = v1; } else { *P = v2 }
1483/// or:
1484/// *P = v1; if () { *P = v2; }
1485/// into a phi node with a store in the successor.
1487 if (!SI.isUnordered())
1488 return false; // This code has not been audited for volatile/ordered case.
1489
1490 // Check if the successor block has exactly 2 incoming edges.
1491 BasicBlock *StoreBB = SI.getParent();
1492 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1493 if (!DestBB->hasNPredecessors(2))
1494 return false;
1495
1496 // Capture the other block (the block that doesn't contain our store).
1497 pred_iterator PredIter = pred_begin(DestBB);
1498 if (*PredIter == StoreBB)
1499 ++PredIter;
1500 BasicBlock *OtherBB = *PredIter;
1501
1502 // Bail out if all of the relevant blocks aren't distinct. This can happen,
1503 // for example, if SI is in an infinite loop.
1504 if (StoreBB == DestBB || OtherBB == DestBB)
1505 return false;
1506
1507 // Verify that the other block ends in a branch and is not otherwise empty.
1508 BasicBlock::iterator BBI(OtherBB->getTerminator());
1509 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1510 if (!OtherBr || BBI == OtherBB->begin())
1511 return false;
1512
1513 auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
1514 if (!OtherStore ||
1515 OtherStore->getPointerOperand() != SI.getPointerOperand())
1516 return false;
1517
1518 auto *SIVTy = SI.getValueOperand()->getType();
1519 auto *OSVTy = OtherStore->getValueOperand()->getType();
1520 return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
1521 SI.hasSameSpecialState(OtherStore);
1522 };
1523
1524 // If the other block ends in an unconditional branch, check for the 'if then
1525 // else' case. There is an instruction before the branch.
1526 StoreInst *OtherStore = nullptr;
1527 if (OtherBr->isUnconditional()) {
1528 --BBI;
1529 // Skip over debugging info and pseudo probes.
1530 while (BBI->isDebugOrPseudoInst()) {
1531 if (BBI==OtherBB->begin())
1532 return false;
1533 --BBI;
1534 }
1535 // If this isn't a store, isn't a store to the same location, or is not the
1536 // right kind of store, bail out.
1537 OtherStore = dyn_cast<StoreInst>(BBI);
1538 if (!OtherStoreIsMergeable(OtherStore))
1539 return false;
1540 } else {
1541 // Otherwise, the other block ended with a conditional branch. If one of the
1542 // destinations is StoreBB, then we have the if/then case.
1543 if (OtherBr->getSuccessor(0) != StoreBB &&
1544 OtherBr->getSuccessor(1) != StoreBB)
1545 return false;
1546
1547 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1548 // if/then triangle. See if there is a store to the same ptr as SI that
1549 // lives in OtherBB.
1550 for (;; --BBI) {
1551 // Check to see if we find the matching store.
1552 OtherStore = dyn_cast<StoreInst>(BBI);
1553 if (OtherStoreIsMergeable(OtherStore))
1554 break;
1555
1556 // If we find something that may be using or overwriting the stored
1557 // value, or if we run out of instructions, we can't do the transform.
1558 if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1559 BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1560 return false;
1561 }
1562
1563 // In order to eliminate the store in OtherBr, we have to make sure nothing
1564 // reads or overwrites the stored value in StoreBB.
1565 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1566 // FIXME: This should really be AA driven.
1567 if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1568 return false;
1569 }
1570 }
1571
1572 // Insert a PHI node now if we need it.
1573 Value *MergedVal = OtherStore->getValueOperand();
1574 // The debug locations of the original instructions might differ. Merge them.
1575 DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
1576 OtherStore->getDebugLoc());
1577 if (MergedVal != SI.getValueOperand()) {
1578 PHINode *PN =
1579 PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
1580 PN->addIncoming(SI.getValueOperand(), SI.getParent());
1581 Builder.SetInsertPoint(OtherStore);
1582 PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
1583 OtherBB);
1584 MergedVal = InsertNewInstBefore(PN, DestBB->begin());
1585 PN->setDebugLoc(MergedLoc);
1586 }
1587
1588 // Advance to a place where it is safe to insert the new store and insert it.
1589 BBI = DestBB->getFirstInsertionPt();
1590 StoreInst *NewSI =
1591 new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
1592 SI.getOrdering(), SI.getSyncScopeID());
1593 InsertNewInstBefore(NewSI, BBI);
1594 NewSI->setDebugLoc(MergedLoc);
1595 NewSI->mergeDIAssignID({&SI, OtherStore});
1596
1597 // If the two stores had AA tags, merge them.
1598 AAMDNodes AATags = SI.getAAMetadata();
1599 if (AATags)
1600 NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
1601
1602 // Nuke the old stores.
1604 eraseInstFromFunction(*OtherStore);
1605 return true;
1606}
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides internal interfaces used to implement the InstCombine.
static StoreInst * combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
static Instruction * combineLoadToOperationType(InstCombinerImpl &IC, LoadInst &Load)
Combine loads to match the type of their uses' value after looking through intervening bitcasts.
static Instruction * replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, Instruction &MemI)
static Instruction * simplifyAllocaArraySize(InstCombinerImpl &IC, AllocaInst &AI, DominatorTree &DT)
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isSupportedAtomicType(Type *Ty)
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
static Instruction * unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI)
static cl::opt< unsigned > MaxCopiedFromConstantUsers("instcombine-max-copied-from-constant-users", cl::init(300), cl::desc("Maximum users to visit in copy from constant transform"), cl::Hidden)
static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI)
static Value * likeBitCastFromVector(InstCombinerImpl &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
static bool isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction * > &ToDelete)
isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) pointer to an alloca.
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
This file provides the interface for the instcombine pass implementation.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallString class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Returns a bitmask that should be unconditionally applied to the ModRef info of a memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:61
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:122
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:115
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:137
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:102
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
Definition: Instructions.h:126
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:899
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:868
A debug info location.
Definition: DebugLoc.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:938
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
Type * getSourceElementType() const
Definition: Instructions.h:971
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1778
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2531
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1812
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2524
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1879
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2213
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1795
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2204
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1831
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitLoadInst(LoadInst &LI)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitStoreInst(StoreInst &SI)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
bool removeInstructionsBeforeUnreachable(Instruction &I)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitAllocaInst(AllocaInst &AI)
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:366
AAResults * AA
Definition: InstCombiner.h:69
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:55
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
const DataLayout & DL
Definition: InstCombiner.h:75
AssumptionCache & AC
Definition: InstCombiner.h:72
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:410
DominatorTree & DT
Definition: InstCombiner.h:74
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:431
BuilderTy & Builder
Definition: InstCombiner.h:60
void push(Instruction *I)
Push the instruction onto the worklist stack.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:936
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1727
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1713
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
void setAlignment(Align Align)
Definition: Instructions.h:213
Value * getPointerOperand()
Definition: Instructions.h:253
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:239
bool isSimple() const
Definition: Instructions.h:245
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
Metadata node.
Definition: Metadata.h:1069
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
This class wraps the llvm.memcpy/memmove intrinsics.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
size_type size() const
Definition: SmallPtrSet.h:96
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:95
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:951
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
An instruction for storing to memory.
Definition: Instructions.h:290
Value * getValueOperand()
Definition: Instructions.h:374
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:360
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getZero()
Definition: TypeSize.h:351
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:298
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:258
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:200
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:239
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:201
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:3367
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:479
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1541
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2132
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:372
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2715
Value * simplifyLoadInst(LoadInst *LI, Value *PtrOp, const SimplifyQuery &Q)
Given a load instruction and its pointer operand, fold the result or return null.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3345
cl::opt< bool > EnableInferAlignmentPass
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt,...
Definition: STLExtras.h:2088
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:97
SimplifyQuery getWithInstruction(const Instruction *I) const