LLVM 18.0.0git
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for load, store and alloca.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/Loads.h"
19#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/LLVMContext.h"
26using namespace llvm;
27using namespace PatternMatch;
28
29#define DEBUG_TYPE "instcombine"
30
31STATISTIC(NumDeadStore, "Number of dead stores eliminated");
32STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
33
35 "instcombine-max-copied-from-constant-users", cl::init(300),
36 cl::desc("Maximum users to visit in copy from constant transform"),
38
40 "enable-infer-alignment-pass", cl::init(true), cl::Hidden, cl::ZeroOrMore,
41 cl::desc("Enable the InferAlignment pass, disabling alignment inference in "
42 "InstCombine"));
43
44/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
45/// pointer to an alloca. Ignore any reads of the pointer, return false if we
46/// see any stores or other unknown uses. If we see pointer arithmetic, keep
47/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
48/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
49/// the alloca, and if the source pointer is a pointer to a constant memory
50/// location, we can optimize this.
51static bool
53 MemTransferInst *&TheCopy,
55 // We track lifetime intrinsics as we encounter them. If we decide to go
56 // ahead and replace the value with the memory location, this lets the caller
57 // quickly eliminate the markers.
58
59 using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
62 Worklist.emplace_back(V, false);
63 while (!Worklist.empty()) {
64 ValueAndIsOffset Elem = Worklist.pop_back_val();
65 if (!Visited.insert(Elem).second)
66 continue;
67 if (Visited.size() > MaxCopiedFromConstantUsers)
68 return false;
69
70 const auto [Value, IsOffset] = Elem;
71 for (auto &U : Value->uses()) {
72 auto *I = cast<Instruction>(U.getUser());
73
74 if (auto *LI = dyn_cast<LoadInst>(I)) {
75 // Ignore non-volatile loads, they are always ok.
76 if (!LI->isSimple()) return false;
77 continue;
78 }
79
80 if (isa<PHINode, SelectInst>(I)) {
81 // We set IsOffset=true, to forbid the memcpy from occurring after the
82 // phi: If one of the phi operands is not based on the alloca, we
83 // would incorrectly omit a write.
84 Worklist.emplace_back(I, true);
85 continue;
86 }
87 if (isa<BitCastInst, AddrSpaceCastInst>(I)) {
88 // If uses of the bitcast are ok, we are ok.
89 Worklist.emplace_back(I, IsOffset);
90 continue;
91 }
92 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
93 // If the GEP has all zero indices, it doesn't offset the pointer. If it
94 // doesn't, it does.
95 Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
96 continue;
97 }
98
99 if (auto *Call = dyn_cast<CallBase>(I)) {
100 // If this is the function being called then we treat it like a load and
101 // ignore it.
102 if (Call->isCallee(&U))
103 continue;
104
105 unsigned DataOpNo = Call->getDataOperandNo(&U);
106 bool IsArgOperand = Call->isArgOperand(&U);
107
108 // Inalloca arguments are clobbered by the call.
109 if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
110 return false;
111
112 // If this call site doesn't modify the memory, then we know it is just
113 // a load (but one that potentially returns the value itself), so we can
114 // ignore it if we know that the value isn't captured.
115 bool NoCapture = Call->doesNotCapture(DataOpNo);
116 if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) ||
117 (Call->onlyReadsMemory(DataOpNo) && NoCapture))
118 continue;
119
120 // If this is being passed as a byval argument, the caller is making a
121 // copy, so it is only a read of the alloca.
122 if (IsArgOperand && Call->isByValArgument(DataOpNo))
123 continue;
124 }
125
126 // Lifetime intrinsics can be handled by the caller.
127 if (I->isLifetimeStartOrEnd()) {
128 assert(I->use_empty() && "Lifetime markers have no result to use!");
129 ToDelete.push_back(I);
130 continue;
131 }
132
133 // If this is isn't our memcpy/memmove, reject it as something we can't
134 // handle.
135 MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
136 if (!MI)
137 return false;
138
139 // If the transfer is volatile, reject it.
140 if (MI->isVolatile())
141 return false;
142
143 // If the transfer is using the alloca as a source of the transfer, then
144 // ignore it since it is a load (unless the transfer is volatile).
145 if (U.getOperandNo() == 1)
146 continue;
147
148 // If we already have seen a copy, reject the second one.
149 if (TheCopy) return false;
150
151 // If the pointer has been offset from the start of the alloca, we can't
152 // safely handle this.
153 if (IsOffset) return false;
154
155 // If the memintrinsic isn't using the alloca as the dest, reject it.
156 if (U.getOperandNo() != 0) return false;
157
158 // If the source of the memcpy/move is not constant, reject it.
159 if (isModSet(AA->getModRefInfoMask(MI->getSource())))
160 return false;
161
162 // Otherwise, the transform is safe. Remember the copy instruction.
163 TheCopy = MI;
164 }
165 }
166 return true;
167}
168
169/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
170/// modified by a copy from a constant memory location. If we can prove this, we
171/// can replace any uses of the alloca with uses of the memory location
172/// directly.
173static MemTransferInst *
175 AllocaInst *AI,
177 MemTransferInst *TheCopy = nullptr;
178 if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
179 return TheCopy;
180 return nullptr;
181}
182
183/// Returns true if V is dereferenceable for size of alloca.
184static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
185 const DataLayout &DL) {
186 if (AI->isArrayAllocation())
187 return false;
188 uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
189 if (!AllocaSize)
190 return false;
192 APInt(64, AllocaSize), DL);
193}
194
196 AllocaInst &AI, DominatorTree &DT) {
197 // Check for array size of 1 (scalar allocation).
198 if (!AI.isArrayAllocation()) {
199 // i32 1 is the canonical array size for scalar allocations.
200 if (AI.getArraySize()->getType()->isIntegerTy(32))
201 return nullptr;
202
203 // Canonicalize it.
204 return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
205 }
206
207 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
208 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
209 if (C->getValue().getActiveBits() <= 64) {
210 Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
211 AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
212 nullptr, AI.getName());
213 New->setAlignment(AI.getAlign());
214
215 replaceAllDbgUsesWith(AI, *New, *New, DT);
216
217 // Scan to the end of the allocation instructions, to skip over a block of
218 // allocas if possible...also skip interleaved debug info
219 //
220 BasicBlock::iterator It(New);
221 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
222 ++It;
223
224 // Now that I is pointing to the first non-allocation-inst in the block,
225 // insert our getelementptr instruction...
226 //
227 Type *IdxTy = IC.getDataLayout().getIndexType(AI.getType());
228 Value *NullIdx = Constant::getNullValue(IdxTy);
229 Value *Idx[2] = {NullIdx, NullIdx};
231 NewTy, New, Idx, New->getName() + ".sub");
232 IC.InsertNewInstBefore(GEP, It);
233
234 // Now make everything use the getelementptr instead of the original
235 // allocation.
236 return IC.replaceInstUsesWith(AI, GEP);
237 }
238 }
239
240 if (isa<UndefValue>(AI.getArraySize()))
242
243 // Ensure that the alloca array size argument has type equal to the offset
244 // size of the alloca() pointer, which, in the tyical case, is intptr_t,
245 // so that any casting is exposed early.
246 Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
247 if (AI.getArraySize()->getType() != PtrIdxTy) {
248 Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
249 return IC.replaceOperand(AI, 0, V);
250 }
251
252 return nullptr;
253}
254
255namespace {
256// If I and V are pointers in different address space, it is not allowed to
257// use replaceAllUsesWith since I and V have different types. A
258// non-target-specific transformation should not use addrspacecast on V since
259// the two address space may be disjoint depending on target.
260//
261// This class chases down uses of the old pointer until reaching the load
262// instructions, then replaces the old pointer in the load instructions with
263// the new pointer. If during the chasing it sees bitcast or GEP, it will
264// create new bitcast or GEP with the new pointer and use them in the load
265// instruction.
266class PointerReplacer {
267public:
268 PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
269 : IC(IC), Root(Root), FromAS(SrcAS) {}
270
271 bool collectUsers();
272 void replacePointer(Value *V);
273
274private:
275 bool collectUsersRecursive(Instruction &I);
276 void replace(Instruction *I);
277 Value *getReplacement(Value *I);
278 bool isAvailable(Instruction *I) const {
279 return I == &Root || Worklist.contains(I);
280 }
281
282 bool isEqualOrValidAddrSpaceCast(const Instruction *I,
283 unsigned FromAS) const {
284 const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
285 if (!ASC)
286 return false;
287 unsigned ToAS = ASC->getDestAddressSpace();
288 return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
289 }
290
291 SmallPtrSet<Instruction *, 32> ValuesToRevisit;
295 Instruction &Root;
296 unsigned FromAS;
297};
298} // end anonymous namespace
299
300bool PointerReplacer::collectUsers() {
301 if (!collectUsersRecursive(Root))
302 return false;
303
304 // Ensure that all outstanding (indirect) users of I
305 // are inserted into the Worklist. Return false
306 // otherwise.
307 for (auto *Inst : ValuesToRevisit)
308 if (!Worklist.contains(Inst))
309 return false;
310 return true;
311}
312
313bool PointerReplacer::collectUsersRecursive(Instruction &I) {
314 for (auto *U : I.users()) {
315 auto *Inst = cast<Instruction>(&*U);
316 if (auto *Load = dyn_cast<LoadInst>(Inst)) {
317 if (Load->isVolatile())
318 return false;
319 Worklist.insert(Load);
320 } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
321 // All incoming values must be instructions for replacability
322 if (any_of(PHI->incoming_values(),
323 [](Value *V) { return !isa<Instruction>(V); }))
324 return false;
325
326 // If at least one incoming value of the PHI is not in Worklist,
327 // store the PHI for revisiting and skip this iteration of the
328 // loop.
329 if (any_of(PHI->incoming_values(), [this](Value *V) {
330 return !isAvailable(cast<Instruction>(V));
331 })) {
332 ValuesToRevisit.insert(Inst);
333 continue;
334 }
335
336 Worklist.insert(PHI);
337 if (!collectUsersRecursive(*PHI))
338 return false;
339 } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
340 if (!isa<Instruction>(SI->getTrueValue()) ||
341 !isa<Instruction>(SI->getFalseValue()))
342 return false;
343
344 if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
345 !isAvailable(cast<Instruction>(SI->getFalseValue()))) {
346 ValuesToRevisit.insert(Inst);
347 continue;
348 }
349 Worklist.insert(SI);
350 if (!collectUsersRecursive(*SI))
351 return false;
352 } else if (isa<GetElementPtrInst, BitCastInst>(Inst)) {
353 Worklist.insert(Inst);
354 if (!collectUsersRecursive(*Inst))
355 return false;
356 } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
357 if (MI->isVolatile())
358 return false;
359 Worklist.insert(Inst);
360 } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
361 Worklist.insert(Inst);
362 } else if (Inst->isLifetimeStartOrEnd()) {
363 continue;
364 } else {
365 LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
366 return false;
367 }
368 }
369
370 return true;
371}
372
373Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
374
375void PointerReplacer::replace(Instruction *I) {
376 if (getReplacement(I))
377 return;
378
379 if (auto *LT = dyn_cast<LoadInst>(I)) {
380 auto *V = getReplacement(LT->getPointerOperand());
381 assert(V && "Operand not replaced");
382 auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
383 LT->getAlign(), LT->getOrdering(),
384 LT->getSyncScopeID());
385 NewI->takeName(LT);
386 copyMetadataForLoad(*NewI, *LT);
387
388 IC.InsertNewInstWith(NewI, LT->getIterator());
389 IC.replaceInstUsesWith(*LT, NewI);
390 WorkMap[LT] = NewI;
391 } else if (auto *PHI = dyn_cast<PHINode>(I)) {
392 Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
393 auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
394 PHI->getName(), PHI);
395 for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
396 NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
397 PHI->getIncomingBlock(I));
398 WorkMap[PHI] = NewPHI;
399 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
400 auto *V = getReplacement(GEP->getPointerOperand());
401 assert(V && "Operand not replaced");
403 Indices.append(GEP->idx_begin(), GEP->idx_end());
404 auto *NewI =
405 GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
406 IC.InsertNewInstWith(NewI, GEP->getIterator());
407 NewI->takeName(GEP);
408 WorkMap[GEP] = NewI;
409 } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
410 auto *V = getReplacement(BC->getOperand(0));
411 assert(V && "Operand not replaced");
412 auto *NewT = PointerType::get(BC->getType()->getContext(),
413 V->getType()->getPointerAddressSpace());
414 auto *NewI = new BitCastInst(V, NewT);
415 IC.InsertNewInstWith(NewI, BC->getIterator());
416 NewI->takeName(BC);
417 WorkMap[BC] = NewI;
418 } else if (auto *SI = dyn_cast<SelectInst>(I)) {
419 auto *NewSI = SelectInst::Create(
420 SI->getCondition(), getReplacement(SI->getTrueValue()),
421 getReplacement(SI->getFalseValue()), SI->getName(), nullptr, SI);
422 IC.InsertNewInstWith(NewSI, SI->getIterator());
423 NewSI->takeName(SI);
424 WorkMap[SI] = NewSI;
425 } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
426 auto *SrcV = getReplacement(MemCpy->getRawSource());
427 // The pointer may appear in the destination of a copy, but we don't want to
428 // replace it.
429 if (!SrcV) {
430 assert(getReplacement(MemCpy->getRawDest()) &&
431 "destination not in replace list");
432 return;
433 }
434
435 IC.Builder.SetInsertPoint(MemCpy);
436 auto *NewI = IC.Builder.CreateMemTransferInst(
437 MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(),
438 SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(),
439 MemCpy->isVolatile());
440 AAMDNodes AAMD = MemCpy->getAAMetadata();
441 if (AAMD)
442 NewI->setAAMetadata(AAMD);
443
444 IC.eraseInstFromFunction(*MemCpy);
445 WorkMap[MemCpy] = NewI;
446 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
447 auto *V = getReplacement(ASC->getPointerOperand());
448 assert(V && "Operand not replaced");
449 assert(isEqualOrValidAddrSpaceCast(
450 ASC, V->getType()->getPointerAddressSpace()) &&
451 "Invalid address space cast!");
452 auto *NewV = V;
453 if (V->getType()->getPointerAddressSpace() !=
454 ASC->getType()->getPointerAddressSpace()) {
455 auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
456 NewI->takeName(ASC);
457 IC.InsertNewInstWith(NewI, ASC->getIterator());
458 NewV = NewI;
459 }
460 IC.replaceInstUsesWith(*ASC, NewV);
461 IC.eraseInstFromFunction(*ASC);
462 } else {
463 llvm_unreachable("should never reach here");
464 }
465}
466
467void PointerReplacer::replacePointer(Value *V) {
468#ifndef NDEBUG
469 auto *PT = cast<PointerType>(Root.getType());
470 auto *NT = cast<PointerType>(V->getType());
471 assert(PT != NT && "Invalid usage");
472#endif
473 WorkMap[&Root] = V;
474
475 for (Instruction *Workitem : Worklist)
476 replace(Workitem);
477}
478
480 if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
481 return I;
482
483 if (AI.getAllocatedType()->isSized()) {
484 // Move all alloca's of zero byte objects to the entry block and merge them
485 // together. Note that we only do this for alloca's, because malloc should
486 // allocate and return a unique pointer, even for a zero byte allocation.
488 // For a zero sized alloca there is no point in doing an array allocation.
489 // This is helpful if the array size is a complicated expression not used
490 // elsewhere.
491 if (AI.isArrayAllocation())
492 return replaceOperand(AI, 0,
494
495 // Get the first instruction in the entry block.
496 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
497 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
498 if (FirstInst != &AI) {
499 // If the entry block doesn't start with a zero-size alloca then move
500 // this one to the start of the entry block. There is no problem with
501 // dominance as the array size was forced to a constant earlier already.
502 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
503 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
505 .getKnownMinValue() != 0) {
506 AI.moveBefore(FirstInst);
507 return &AI;
508 }
509
510 // Replace this zero-sized alloca with the one at the start of the entry
511 // block after ensuring that the address will be aligned enough for both
512 // types.
513 const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
514 EntryAI->setAlignment(MaxAlign);
515 return replaceInstUsesWith(AI, EntryAI);
516 }
517 }
518 }
519
520 // Check to see if this allocation is only modified by a memcpy/memmove from
521 // a memory location whose alignment is equal to or exceeds that of the
522 // allocation. If this is the case, we can change all users to use the
523 // constant memory location instead. This is commonly produced by the CFE by
524 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
525 // is only subsequently read.
527 if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
528 Value *TheSrc = Copy->getSource();
529 Align AllocaAlign = AI.getAlign();
530 Align SourceAlign = getOrEnforceKnownAlignment(
531 TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
532 if (AllocaAlign <= SourceAlign &&
533 isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
534 !isa<Instruction>(TheSrc)) {
535 // FIXME: Can we sink instructions without violating dominance when TheSrc
536 // is an instruction instead of a constant or argument?
537 LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
538 LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
539 unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
540 if (AI.getAddressSpace() == SrcAddrSpace) {
541 for (Instruction *Delete : ToDelete)
542 eraseInstFromFunction(*Delete);
543
544 Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
546 ++NumGlobalCopies;
547 return NewI;
548 }
549
550 PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
551 if (PtrReplacer.collectUsers()) {
552 for (Instruction *Delete : ToDelete)
553 eraseInstFromFunction(*Delete);
554
555 PtrReplacer.replacePointer(TheSrc);
556 ++NumGlobalCopies;
557 }
558 }
559 }
560
561 // At last, use the generic allocation site handler to aggressively remove
562 // unused allocas.
563 return visitAllocSite(AI);
564}
565
566// Are we allowed to form a atomic load or store of this type?
567static bool isSupportedAtomicType(Type *Ty) {
568 return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
569}
570
571/// Helper to combine a load to a new type.
572///
573/// This just does the work of combining a load to a new type. It handles
574/// metadata, etc., and returns the new instruction. The \c NewTy should be the
575/// loaded *value* type. This will convert it to a pointer, cast the operand to
576/// that pointer type, load it, etc.
577///
578/// Note that this will create all of the instructions with whatever insert
579/// point the \c InstCombinerImpl currently is using.
581 const Twine &Suffix) {
582 assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
583 "can't fold an atomic load to requested type");
584
585 LoadInst *NewLoad =
586 Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(),
587 LI.isVolatile(), LI.getName() + Suffix);
588 NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
589 copyMetadataForLoad(*NewLoad, LI);
590 return NewLoad;
591}
592
593/// Combine a store to a new type.
594///
595/// Returns the newly created store instruction.
597 Value *V) {
598 assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
599 "can't fold an atomic store of requested type");
600
601 Value *Ptr = SI.getPointerOperand();
603 SI.getAllMetadata(MD);
604
605 StoreInst *NewStore =
606 IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
607 NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
608 for (const auto &MDPair : MD) {
609 unsigned ID = MDPair.first;
610 MDNode *N = MDPair.second;
611 // Note, essentially every kind of metadata should be preserved here! This
612 // routine is supposed to clone a store instruction changing *only its
613 // type*. The only metadata it makes sense to drop is metadata which is
614 // invalidated when the pointer type changes. This should essentially
615 // never be the case in LLVM, but we explicitly switch over only known
616 // metadata to be conservatively correct. If you are adding metadata to
617 // LLVM which pertains to stores, you almost certainly want to add it
618 // here.
619 switch (ID) {
620 case LLVMContext::MD_dbg:
621 case LLVMContext::MD_DIAssignID:
622 case LLVMContext::MD_tbaa:
623 case LLVMContext::MD_prof:
624 case LLVMContext::MD_fpmath:
625 case LLVMContext::MD_tbaa_struct:
626 case LLVMContext::MD_alias_scope:
627 case LLVMContext::MD_noalias:
628 case LLVMContext::MD_nontemporal:
629 case LLVMContext::MD_mem_parallel_loop_access:
630 case LLVMContext::MD_access_group:
631 // All of these directly apply.
632 NewStore->setMetadata(ID, N);
633 break;
634 case LLVMContext::MD_invariant_load:
635 case LLVMContext::MD_nonnull:
636 case LLVMContext::MD_noundef:
637 case LLVMContext::MD_range:
638 case LLVMContext::MD_align:
639 case LLVMContext::MD_dereferenceable:
640 case LLVMContext::MD_dereferenceable_or_null:
641 // These don't apply for stores.
642 break;
643 }
644 }
645
646 return NewStore;
647}
648
649/// Returns true if instruction represent minmax pattern like:
650/// select ((cmp load V1, load V2), V1, V2).
651static bool isMinMaxWithLoads(Value *V, Type *&LoadTy) {
652 assert(V->getType()->isPointerTy() && "Expected pointer type.");
653 // Ignore possible ty* to ixx* bitcast.
655 // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
656 // pattern.
658 Instruction *L1;
659 Instruction *L2;
660 Value *LHS;
661 Value *RHS;
662 if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
663 m_Value(LHS), m_Value(RHS))))
664 return false;
665 LoadTy = L1->getType();
666 return (match(L1, m_Load(m_Specific(LHS))) &&
667 match(L2, m_Load(m_Specific(RHS)))) ||
668 (match(L1, m_Load(m_Specific(RHS))) &&
669 match(L2, m_Load(m_Specific(LHS))));
670}
671
672/// Combine loads to match the type of their uses' value after looking
673/// through intervening bitcasts.
674///
675/// The core idea here is that if the result of a load is used in an operation,
676/// we should load the type most conducive to that operation. For example, when
677/// loading an integer and converting that immediately to a pointer, we should
678/// instead directly load a pointer.
679///
680/// However, this routine must never change the width of a load or the number of
681/// loads as that would introduce a semantic change. This combine is expected to
682/// be a semantic no-op which just allows loads to more closely model the types
683/// of their consuming operations.
684///
685/// Currently, we also refuse to change the precise type used for an atomic load
686/// or a volatile load. This is debatable, and might be reasonable to change
687/// later. However, it is risky in case some backend or other part of LLVM is
688/// relying on the exact type loaded to select appropriate atomic operations.
690 LoadInst &Load) {
691 // FIXME: We could probably with some care handle both volatile and ordered
692 // atomic loads here but it isn't clear that this is important.
693 if (!Load.isUnordered())
694 return nullptr;
695
696 if (Load.use_empty())
697 return nullptr;
698
699 // swifterror values can't be bitcasted.
700 if (Load.getPointerOperand()->isSwiftError())
701 return nullptr;
702
703 // Fold away bit casts of the loaded value by loading the desired type.
704 // Note that we should not do this for pointer<->integer casts,
705 // because that would result in type punning.
706 if (Load.hasOneUse()) {
707 // Don't transform when the type is x86_amx, it makes the pass that lower
708 // x86_amx type happy.
709 Type *LoadTy = Load.getType();
710 if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
711 assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
712 if (BC->getType()->isX86_AMXTy())
713 return nullptr;
714 }
715
716 if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
717 Type *DestTy = CastUser->getDestTy();
718 if (CastUser->isNoopCast(IC.getDataLayout()) &&
719 LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
720 (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
721 LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
722 CastUser->replaceAllUsesWith(NewLoad);
723 IC.eraseInstFromFunction(*CastUser);
724 return &Load;
725 }
726 }
727 }
728
729 // FIXME: We should also canonicalize loads of vectors when their elements are
730 // cast to other types.
731 return nullptr;
732}
733
735 // FIXME: We could probably with some care handle both volatile and atomic
736 // stores here but it isn't clear that this is important.
737 if (!LI.isSimple())
738 return nullptr;
739
740 Type *T = LI.getType();
741 if (!T->isAggregateType())
742 return nullptr;
743
744 StringRef Name = LI.getName();
745
746 if (auto *ST = dyn_cast<StructType>(T)) {
747 // If the struct only have one element, we unpack.
748 auto NumElements = ST->getNumElements();
749 if (NumElements == 1) {
750 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
751 ".unpack");
752 NewLoad->setAAMetadata(LI.getAAMetadata());
754 PoisonValue::get(T), NewLoad, 0, Name));
755 }
756
757 // We don't want to break loads with padding here as we'd loose
758 // the knowledge that padding exists for the rest of the pipeline.
759 const DataLayout &DL = IC.getDataLayout();
760 auto *SL = DL.getStructLayout(ST);
761
762 // Don't unpack for structure with scalable vector.
763 if (SL->getSizeInBits().isScalable())
764 return nullptr;
765
766 if (SL->hasPadding())
767 return nullptr;
768
769 const auto Align = LI.getAlign();
770 auto *Addr = LI.getPointerOperand();
771 auto *IdxType = Type::getInt32Ty(T->getContext());
772 auto *Zero = ConstantInt::get(IdxType, 0);
773
775 for (unsigned i = 0; i < NumElements; i++) {
776 Value *Indices[2] = {
777 Zero,
778 ConstantInt::get(IdxType, i),
779 };
780 auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices),
781 Name + ".elt");
782 auto *L = IC.Builder.CreateAlignedLoad(
783 ST->getElementType(i), Ptr,
784 commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
785 // Propagate AA metadata. It'll still be valid on the narrowed load.
786 L->setAAMetadata(LI.getAAMetadata());
787 V = IC.Builder.CreateInsertValue(V, L, i);
788 }
789
790 V->setName(Name);
791 return IC.replaceInstUsesWith(LI, V);
792 }
793
794 if (auto *AT = dyn_cast<ArrayType>(T)) {
795 auto *ET = AT->getElementType();
796 auto NumElements = AT->getNumElements();
797 if (NumElements == 1) {
798 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
799 NewLoad->setAAMetadata(LI.getAAMetadata());
801 PoisonValue::get(T), NewLoad, 0, Name));
802 }
803
804 // Bail out if the array is too large. Ideally we would like to optimize
805 // arrays of arbitrary size but this has a terrible impact on compile time.
806 // The threshold here is chosen arbitrarily, maybe needs a little bit of
807 // tuning.
808 if (NumElements > IC.MaxArraySizeForCombine)
809 return nullptr;
810
811 const DataLayout &DL = IC.getDataLayout();
812 TypeSize EltSize = DL.getTypeAllocSize(ET);
813 const auto Align = LI.getAlign();
814
815 auto *Addr = LI.getPointerOperand();
816 auto *IdxType = Type::getInt64Ty(T->getContext());
817 auto *Zero = ConstantInt::get(IdxType, 0);
818
820 TypeSize Offset = TypeSize::get(0, ET->isScalableTy());
821 for (uint64_t i = 0; i < NumElements; i++) {
822 Value *Indices[2] = {
823 Zero,
824 ConstantInt::get(IdxType, i),
825 };
826 auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
827 Name + ".elt");
828 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
829 auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
830 EltAlign, Name + ".unpack");
831 L->setAAMetadata(LI.getAAMetadata());
832 V = IC.Builder.CreateInsertValue(V, L, i);
833 Offset += EltSize;
834 }
835
836 V->setName(Name);
837 return IC.replaceInstUsesWith(LI, V);
838 }
839
840 return nullptr;
841}
842
843// If we can determine that all possible objects pointed to by the provided
844// pointer value are, not only dereferenceable, but also definitively less than
845// or equal to the provided maximum size, then return true. Otherwise, return
846// false (constant global values and allocas fall into this category).
847//
848// FIXME: This should probably live in ValueTracking (or similar).
850 const DataLayout &DL) {
852 SmallVector<Value *, 4> Worklist(1, V);
853
854 do {
855 Value *P = Worklist.pop_back_val();
856 P = P->stripPointerCasts();
857
858 if (!Visited.insert(P).second)
859 continue;
860
861 if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
862 Worklist.push_back(SI->getTrueValue());
863 Worklist.push_back(SI->getFalseValue());
864 continue;
865 }
866
867 if (PHINode *PN = dyn_cast<PHINode>(P)) {
868 append_range(Worklist, PN->incoming_values());
869 continue;
870 }
871
872 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
873 if (GA->isInterposable())
874 return false;
875 Worklist.push_back(GA->getAliasee());
876 continue;
877 }
878
879 // If we know how big this object is, and it is less than MaxSize, continue
880 // searching. Otherwise, return false.
881 if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
882 if (!AI->getAllocatedType()->isSized())
883 return false;
884
885 ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
886 if (!CS)
887 return false;
888
889 TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType());
890 if (TS.isScalable())
891 return false;
892 // Make sure that, even if the multiplication below would wrap as an
893 // uint64_t, we still do the right thing.
894 if ((CS->getValue().zext(128) * APInt(128, TS.getFixedValue()))
895 .ugt(MaxSize))
896 return false;
897 continue;
898 }
899
900 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
901 if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
902 return false;
903
904 uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
905 if (InitSize > MaxSize)
906 return false;
907 continue;
908 }
909
910 return false;
911 } while (!Worklist.empty());
912
913 return true;
914}
915
916// If we're indexing into an object of a known size, and the outer index is
917// not a constant, but having any value but zero would lead to undefined
918// behavior, replace it with zero.
919//
920// For example, if we have:
921// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
922// ...
923// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
924// ... = load i32* %arrayidx, align 4
925// Then we know that we can replace %x in the GEP with i64 0.
926//
927// FIXME: We could fold any GEP index to zero that would cause UB if it were
928// not zero. Currently, we only handle the first such index. Also, we could
929// also search through non-zero constant indices if we kept track of the
930// offsets those indices implied.
932 GetElementPtrInst *GEPI, Instruction *MemI,
933 unsigned &Idx) {
934 if (GEPI->getNumOperands() < 2)
935 return false;
936
937 // Find the first non-zero index of a GEP. If all indices are zero, return
938 // one past the last index.
939 auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
940 unsigned I = 1;
941 for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
942 Value *V = GEPI->getOperand(I);
943 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
944 if (CI->isZero())
945 continue;
946
947 break;
948 }
949
950 return I;
951 };
952
953 // Skip through initial 'zero' indices, and find the corresponding pointer
954 // type. See if the next index is not a constant.
955 Idx = FirstNZIdx(GEPI);
956 if (Idx == GEPI->getNumOperands())
957 return false;
958 if (isa<Constant>(GEPI->getOperand(Idx)))
959 return false;
960
961 SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
962 Type *SourceElementType = GEPI->getSourceElementType();
963 // Size information about scalable vectors is not available, so we cannot
964 // deduce whether indexing at n is undefined behaviour or not. Bail out.
965 if (SourceElementType->isScalableTy())
966 return false;
967
968 Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
969 if (!AllocTy || !AllocTy->isSized())
970 return false;
971 const DataLayout &DL = IC.getDataLayout();
972 uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
973
974 // If there are more indices after the one we might replace with a zero, make
975 // sure they're all non-negative. If any of them are negative, the overall
976 // address being computed might be before the base address determined by the
977 // first non-zero index.
978 auto IsAllNonNegative = [&]() {
979 for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
980 KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
981 if (Known.isNonNegative())
982 continue;
983 return false;
984 }
985
986 return true;
987 };
988
989 // FIXME: If the GEP is not inbounds, and there are extra indices after the
990 // one we'll replace, those could cause the address computation to wrap
991 // (rendering the IsAllNonNegative() check below insufficient). We can do
992 // better, ignoring zero indices (and other indices we can prove small
993 // enough not to wrap).
994 if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
995 return false;
996
997 // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
998 // also known to be dereferenceable.
999 return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
1000 IsAllNonNegative();
1001}
1002
1003// If we're indexing into an object with a variable index for the memory
1004// access, but the object has only one element, we can assume that the index
1005// will always be zero. If we replace the GEP, return it.
1007 Instruction &MemI) {
1008 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
1009 unsigned Idx;
1010 if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
1011 Instruction *NewGEPI = GEPI->clone();
1012 NewGEPI->setOperand(Idx,
1013 ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
1014 IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
1015 return NewGEPI;
1016 }
1017 }
1018
1019 return nullptr;
1020}
1021
1023 if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
1024 return false;
1025
1026 auto *Ptr = SI.getPointerOperand();
1027 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
1028 Ptr = GEPI->getOperand(0);
1029 return (isa<ConstantPointerNull>(Ptr) &&
1030 !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
1031}
1032
1034 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
1035 const Value *GEPI0 = GEPI->getOperand(0);
1036 if (isa<ConstantPointerNull>(GEPI0) &&
1037 !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
1038 return true;
1039 }
1040 if (isa<UndefValue>(Op) ||
1041 (isa<ConstantPointerNull>(Op) &&
1043 return true;
1044 return false;
1045}
1046
1048 Value *Op = LI.getOperand(0);
1050 return replaceInstUsesWith(LI, Res);
1051
1052 // Try to canonicalize the loaded type.
1053 if (Instruction *Res = combineLoadToOperationType(*this, LI))
1054 return Res;
1055
1057 // Attempt to improve the alignment.
1058 Align KnownAlign = getOrEnforceKnownAlignment(
1059 Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);
1060 if (KnownAlign > LI.getAlign())
1061 LI.setAlignment(KnownAlign);
1062 }
1063
1064 // Replace GEP indices if possible.
1065 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
1066 return replaceOperand(LI, 0, NewGEPI);
1067
1068 if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1069 return Res;
1070
1071 // Do really simple store-to-load forwarding and load CSE, to catch cases
1072 // where there are several consecutive memory accesses to the same location,
1073 // separated by a few arithmetic operations.
1074 bool IsLoadCSE = false;
1075 if (Value *AvailableVal = FindAvailableLoadedValue(&LI, *AA, &IsLoadCSE)) {
1076 if (IsLoadCSE)
1077 combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1078
1079 return replaceInstUsesWith(
1080 LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1081 LI.getName() + ".cast"));
1082 }
1083
1084 // None of the following transforms are legal for volatile/ordered atomic
1085 // loads. Most of them do apply for unordered atomics.
1086 if (!LI.isUnordered()) return nullptr;
1087
1088 // load(gep null, ...) -> unreachable
1089 // load null/undef -> unreachable
1090 // TODO: Consider a target hook for valid address spaces for this xforms.
1093 return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
1094 }
1095
1096 if (Op->hasOneUse()) {
1097 // Change select and PHI nodes to select values instead of addresses: this
1098 // helps alias analysis out a lot, allows many others simplifications, and
1099 // exposes redundancy in the code.
1100 //
1101 // Note that we cannot do the transformation unless we know that the
1102 // introduced loads cannot trap! Something like this is valid as long as
1103 // the condition is always false: load (select bool %C, int* null, int* %G),
1104 // but it would not be valid if we transformed it to load from null
1105 // unconditionally.
1106 //
1107 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1108 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1109 Align Alignment = LI.getAlign();
1110 if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
1111 Alignment, DL, SI) &&
1112 isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
1113 Alignment, DL, SI)) {
1114 LoadInst *V1 =
1115 Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1116 SI->getOperand(1)->getName() + ".val");
1117 LoadInst *V2 =
1118 Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1119 SI->getOperand(2)->getName() + ".val");
1120 assert(LI.isUnordered() && "implied by above");
1121 V1->setAlignment(Alignment);
1122 V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1123 V2->setAlignment(Alignment);
1124 V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1125 return SelectInst::Create(SI->getCondition(), V1, V2);
1126 }
1127
1128 // load (select (cond, null, P)) -> load P
1129 if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1130 !NullPointerIsDefined(SI->getFunction(),
1131 LI.getPointerAddressSpace()))
1132 return replaceOperand(LI, 0, SI->getOperand(2));
1133
1134 // load (select (cond, P, null)) -> load P
1135 if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1136 !NullPointerIsDefined(SI->getFunction(),
1137 LI.getPointerAddressSpace()))
1138 return replaceOperand(LI, 0, SI->getOperand(1));
1139 }
1140 }
1141 return nullptr;
1142}
1143
1144/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1145///
1146/// \returns underlying value that was "cast", or nullptr otherwise.
1147///
1148/// For example, if we have:
1149///
1150/// %E0 = extractelement <2 x double> %U, i32 0
1151/// %V0 = insertvalue [2 x double] undef, double %E0, 0
1152/// %E1 = extractelement <2 x double> %U, i32 1
1153/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1154///
1155/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1156/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1157/// Note that %U may contain non-undef values where %V1 has undef.
1159 Value *U = nullptr;
1160 while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1161 auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1162 if (!E)
1163 return nullptr;
1164 auto *W = E->getVectorOperand();
1165 if (!U)
1166 U = W;
1167 else if (U != W)
1168 return nullptr;
1169 auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1170 if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1171 return nullptr;
1172 V = IV->getAggregateOperand();
1173 }
1174 if (!match(V, m_Undef()) || !U)
1175 return nullptr;
1176
1177 auto *UT = cast<VectorType>(U->getType());
1178 auto *VT = V->getType();
1179 // Check that types UT and VT are bitwise isomorphic.
1180 const auto &DL = IC.getDataLayout();
1181 if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1182 return nullptr;
1183 }
1184 if (auto *AT = dyn_cast<ArrayType>(VT)) {
1185 if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1186 return nullptr;
1187 } else {
1188 auto *ST = cast<StructType>(VT);
1189 if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1190 return nullptr;
1191 for (const auto *EltT : ST->elements()) {
1192 if (EltT != UT->getElementType())
1193 return nullptr;
1194 }
1195 }
1196 return U;
1197}
1198
1199/// Combine stores to match the type of value being stored.
1200///
1201/// The core idea here is that the memory does not have any intrinsic type and
1202/// where we can we should match the type of a store to the type of value being
1203/// stored.
1204///
1205/// However, this routine must never change the width of a store or the number of
1206/// stores as that would introduce a semantic change. This combine is expected to
1207/// be a semantic no-op which just allows stores to more closely model the types
1208/// of their incoming values.
1209///
1210/// Currently, we also refuse to change the precise type used for an atomic or
1211/// volatile store. This is debatable, and might be reasonable to change later.
1212/// However, it is risky in case some backend or other part of LLVM is relying
1213/// on the exact type stored to select appropriate atomic operations.
1214///
1215/// \returns true if the store was successfully combined away. This indicates
1216/// the caller must erase the store instruction. We have to let the caller erase
1217/// the store instruction as otherwise there is no way to signal whether it was
1218/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1220 // FIXME: We could probably with some care handle both volatile and ordered
1221 // atomic stores here but it isn't clear that this is important.
1222 if (!SI.isUnordered())
1223 return false;
1224
1225 // swifterror values can't be bitcasted.
1226 if (SI.getPointerOperand()->isSwiftError())
1227 return false;
1228
1229 Value *V = SI.getValueOperand();
1230
1231 // Fold away bit casts of the stored value by storing the original type.
1232 if (auto *BC = dyn_cast<BitCastInst>(V)) {
1233 assert(!BC->getType()->isX86_AMXTy() &&
1234 "store to x86_amx* should not happen!");
1235 V = BC->getOperand(0);
1236 // Don't transform when the type is x86_amx, it makes the pass that lower
1237 // x86_amx type happy.
1238 if (V->getType()->isX86_AMXTy())
1239 return false;
1240 if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1241 combineStoreToNewValue(IC, SI, V);
1242 return true;
1243 }
1244 }
1245
1246 if (Value *U = likeBitCastFromVector(IC, V))
1247 if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1248 combineStoreToNewValue(IC, SI, U);
1249 return true;
1250 }
1251
1252 // FIXME: We should also canonicalize stores of vectors when their elements
1253 // are cast to other types.
1254 return false;
1255}
1256
1258 // FIXME: We could probably with some care handle both volatile and atomic
1259 // stores here but it isn't clear that this is important.
1260 if (!SI.isSimple())
1261 return false;
1262
1263 Value *V = SI.getValueOperand();
1264 Type *T = V->getType();
1265
1266 if (!T->isAggregateType())
1267 return false;
1268
1269 if (auto *ST = dyn_cast<StructType>(T)) {
1270 // If the struct only have one element, we unpack.
1271 unsigned Count = ST->getNumElements();
1272 if (Count == 1) {
1273 V = IC.Builder.CreateExtractValue(V, 0);
1274 combineStoreToNewValue(IC, SI, V);
1275 return true;
1276 }
1277
1278 // We don't want to break loads with padding here as we'd loose
1279 // the knowledge that padding exists for the rest of the pipeline.
1280 const DataLayout &DL = IC.getDataLayout();
1281 auto *SL = DL.getStructLayout(ST);
1282
1283 // Don't unpack for structure with scalable vector.
1284 if (SL->getSizeInBits().isScalable())
1285 return false;
1286
1287 if (SL->hasPadding())
1288 return false;
1289
1290 const auto Align = SI.getAlign();
1291
1292 SmallString<16> EltName = V->getName();
1293 EltName += ".elt";
1294 auto *Addr = SI.getPointerOperand();
1295 SmallString<16> AddrName = Addr->getName();
1296 AddrName += ".repack";
1297
1298 auto *IdxType = Type::getInt32Ty(ST->getContext());
1299 auto *Zero = ConstantInt::get(IdxType, 0);
1300 for (unsigned i = 0; i < Count; i++) {
1301 Value *Indices[2] = {
1302 Zero,
1303 ConstantInt::get(IdxType, i),
1304 };
1305 auto *Ptr =
1306 IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), AddrName);
1307 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1308 auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
1309 llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1310 NS->setAAMetadata(SI.getAAMetadata());
1311 }
1312
1313 return true;
1314 }
1315
1316 if (auto *AT = dyn_cast<ArrayType>(T)) {
1317 // If the array only have one element, we unpack.
1318 auto NumElements = AT->getNumElements();
1319 if (NumElements == 1) {
1320 V = IC.Builder.CreateExtractValue(V, 0);
1321 combineStoreToNewValue(IC, SI, V);
1322 return true;
1323 }
1324
1325 // Bail out if the array is too large. Ideally we would like to optimize
1326 // arrays of arbitrary size but this has a terrible impact on compile time.
1327 // The threshold here is chosen arbitrarily, maybe needs a little bit of
1328 // tuning.
1329 if (NumElements > IC.MaxArraySizeForCombine)
1330 return false;
1331
1332 const DataLayout &DL = IC.getDataLayout();
1333 TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
1334 const auto Align = SI.getAlign();
1335
1336 SmallString<16> EltName = V->getName();
1337 EltName += ".elt";
1338 auto *Addr = SI.getPointerOperand();
1339 SmallString<16> AddrName = Addr->getName();
1340 AddrName += ".repack";
1341
1342 auto *IdxType = Type::getInt64Ty(T->getContext());
1343 auto *Zero = ConstantInt::get(IdxType, 0);
1344
1345 TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy());
1346 for (uint64_t i = 0; i < NumElements; i++) {
1347 Value *Indices[2] = {
1348 Zero,
1349 ConstantInt::get(IdxType, i),
1350 };
1351 auto *Ptr =
1352 IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
1353 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1354 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
1355 Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1356 NS->setAAMetadata(SI.getAAMetadata());
1357 Offset += EltSize;
1358 }
1359
1360 return true;
1361 }
1362
1363 return false;
1364}
1365
1366/// equivalentAddressValues - Test if A and B will obviously have the same
1367/// value. This includes recognizing that %t0 and %t1 will have the same
1368/// value in code like this:
1369/// %t0 = getelementptr \@a, 0, 3
1370/// store i32 0, i32* %t0
1371/// %t1 = getelementptr \@a, 0, 3
1372/// %t2 = load i32* %t1
1373///
1375 // Test if the values are trivially equivalent.
1376 if (A == B) return true;
1377
1378 // Test if the values come form identical arithmetic instructions.
1379 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1380 // its only used to compare two uses within the same basic block, which
1381 // means that they'll always either have the same value or one of them
1382 // will have an undefined value.
1383 if (isa<BinaryOperator>(A) ||
1384 isa<CastInst>(A) ||
1385 isa<PHINode>(A) ||
1386 isa<GetElementPtrInst>(A))
1387 if (Instruction *BI = dyn_cast<Instruction>(B))
1388 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1389 return true;
1390
1391 // Otherwise they may not be equivalent.
1392 return false;
1393}
1394
1395/// Converts store (bitcast (load (bitcast (select ...)))) to
1396/// store (load (select ...)), where select is minmax:
1397/// select ((cmp load V1, load V2), V1, V2).
1399 StoreInst &SI) {
1400 // bitcast?
1401 if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1402 return false;
1403 // load? integer?
1404 Value *LoadAddr;
1405 if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1406 return false;
1407 auto *LI = cast<LoadInst>(SI.getValueOperand());
1408 if (!LI->getType()->isIntegerTy())
1409 return false;
1410 Type *CmpLoadTy;
1411 if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy))
1412 return false;
1413
1414 // Make sure the type would actually change.
1415 // This condition can be hit with chains of bitcasts.
1416 if (LI->getType() == CmpLoadTy)
1417 return false;
1418
1419 // Make sure we're not changing the size of the load/store.
1420 const auto &DL = IC.getDataLayout();
1421 if (DL.getTypeStoreSizeInBits(LI->getType()) !=
1422 DL.getTypeStoreSizeInBits(CmpLoadTy))
1423 return false;
1424
1425 if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
1426 auto *SI = dyn_cast<StoreInst>(U);
1427 return SI && SI->getPointerOperand() != LI &&
1428 InstCombiner::peekThroughBitcast(SI->getPointerOperand()) !=
1429 LoadAddr &&
1430 !SI->getPointerOperand()->isSwiftError();
1431 }))
1432 return false;
1433
1434 IC.Builder.SetInsertPoint(LI);
1435 LoadInst *NewLI = IC.combineLoadToNewType(*LI, CmpLoadTy);
1436 // Replace all the stores with stores of the newly loaded value.
1437 for (auto *UI : LI->users()) {
1438 auto *USI = cast<StoreInst>(UI);
1439 IC.Builder.SetInsertPoint(USI);
1440 combineStoreToNewValue(IC, *USI, NewLI);
1441 }
1442 IC.replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
1443 IC.eraseInstFromFunction(*LI);
1444 return true;
1445}
1446
1448 Value *Val = SI.getOperand(0);
1449 Value *Ptr = SI.getOperand(1);
1450
1451 // Try to canonicalize the stored type.
1452 if (combineStoreToValueType(*this, SI))
1453 return eraseInstFromFunction(SI);
1454
1456 // Attempt to improve the alignment.
1457 const Align KnownAlign = getOrEnforceKnownAlignment(
1458 Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);
1459 if (KnownAlign > SI.getAlign())
1460 SI.setAlignment(KnownAlign);
1461 }
1462
1463 // Try to canonicalize the stored type.
1464 if (unpackStoreToAggregate(*this, SI))
1465 return eraseInstFromFunction(SI);
1466
1468 return eraseInstFromFunction(SI);
1469
1470 // Replace GEP indices if possible.
1471 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
1472 return replaceOperand(SI, 1, NewGEPI);
1473
1474 // Don't hack volatile/ordered stores.
1475 // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1476 if (!SI.isUnordered()) return nullptr;
1477
1478 // If the RHS is an alloca with a single use, zapify the store, making the
1479 // alloca dead.
1480 if (Ptr->hasOneUse()) {
1481 if (isa<AllocaInst>(Ptr))
1482 return eraseInstFromFunction(SI);
1483 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1484 if (isa<AllocaInst>(GEP->getOperand(0))) {
1485 if (GEP->getOperand(0)->hasOneUse())
1486 return eraseInstFromFunction(SI);
1487 }
1488 }
1489 }
1490
1491 // If we have a store to a location which is known constant, we can conclude
1492 // that the store must be storing the constant value (else the memory
1493 // wouldn't be constant), and this must be a noop.
1495 return eraseInstFromFunction(SI);
1496
1497 // Do really simple DSE, to catch cases where there are several consecutive
1498 // stores to the same location, separated by a few arithmetic operations. This
1499 // situation often occurs with bitfield accesses.
1500 BasicBlock::iterator BBI(SI);
1501 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1502 --ScanInsts) {
1503 --BBI;
1504 // Don't count debug info directives, lest they affect codegen,
1505 // and we skip pointer-to-pointer bitcasts, which are NOPs.
1506 if (BBI->isDebugOrPseudoInst()) {
1507 ScanInsts++;
1508 continue;
1509 }
1510
1511 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1512 // Prev store isn't volatile, and stores to the same location?
1513 if (PrevSI->isUnordered() &&
1514 equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
1515 PrevSI->getValueOperand()->getType() ==
1516 SI.getValueOperand()->getType()) {
1517 ++NumDeadStore;
1518 // Manually add back the original store to the worklist now, so it will
1519 // be processed after the operands of the removed store, as this may
1520 // expose additional DSE opportunities.
1521 Worklist.push(&SI);
1522 eraseInstFromFunction(*PrevSI);
1523 return nullptr;
1524 }
1525 break;
1526 }
1527
1528 // If this is a load, we have to stop. However, if the loaded value is from
1529 // the pointer we're loading and is producing the pointer we're storing,
1530 // then *this* store is dead (X = load P; store X -> P).
1531 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1532 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1533 assert(SI.isUnordered() && "can't eliminate ordering operation");
1534 return eraseInstFromFunction(SI);
1535 }
1536
1537 // Otherwise, this is a load from some other location. Stores before it
1538 // may not be dead.
1539 break;
1540 }
1541
1542 // Don't skip over loads, throws or things that can modify memory.
1543 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1544 break;
1545 }
1546
1547 // store X, null -> turns into 'unreachable' in SimplifyCFG
1548 // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1549 if (canSimplifyNullStoreOrGEP(SI)) {
1550 if (!isa<PoisonValue>(Val))
1551 return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
1552 return nullptr; // Do not modify these!
1553 }
1554
1555 // This is a non-terminator unreachable marker. Don't remove it.
1556 if (isa<UndefValue>(Ptr)) {
1557 // Remove guaranteed-to-transfer instructions before the marker.
1559 return &SI;
1560
1561 // Remove all instructions after the marker and handle dead blocks this
1562 // implies.
1564 handleUnreachableFrom(SI.getNextNode(), Worklist);
1566 return nullptr;
1567 }
1568
1569 // store undef, Ptr -> noop
1570 // FIXME: This is technically incorrect because it might overwrite a poison
1571 // value. Change to PoisonValue once #52930 is resolved.
1572 if (isa<UndefValue>(Val))
1573 return eraseInstFromFunction(SI);
1574
1575 return nullptr;
1576}
1577
1578/// Try to transform:
1579/// if () { *P = v1; } else { *P = v2 }
1580/// or:
1581/// *P = v1; if () { *P = v2; }
1582/// into a phi node with a store in the successor.
1584 if (!SI.isUnordered())
1585 return false; // This code has not been audited for volatile/ordered case.
1586
1587 // Check if the successor block has exactly 2 incoming edges.
1588 BasicBlock *StoreBB = SI.getParent();
1589 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1590 if (!DestBB->hasNPredecessors(2))
1591 return false;
1592
1593 // Capture the other block (the block that doesn't contain our store).
1594 pred_iterator PredIter = pred_begin(DestBB);
1595 if (*PredIter == StoreBB)
1596 ++PredIter;
1597 BasicBlock *OtherBB = *PredIter;
1598
1599 // Bail out if all of the relevant blocks aren't distinct. This can happen,
1600 // for example, if SI is in an infinite loop.
1601 if (StoreBB == DestBB || OtherBB == DestBB)
1602 return false;
1603
1604 // Verify that the other block ends in a branch and is not otherwise empty.
1605 BasicBlock::iterator BBI(OtherBB->getTerminator());
1606 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1607 if (!OtherBr || BBI == OtherBB->begin())
1608 return false;
1609
1610 auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
1611 if (!OtherStore ||
1612 OtherStore->getPointerOperand() != SI.getPointerOperand())
1613 return false;
1614
1615 auto *SIVTy = SI.getValueOperand()->getType();
1616 auto *OSVTy = OtherStore->getValueOperand()->getType();
1617 return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
1618 SI.hasSameSpecialState(OtherStore);
1619 };
1620
1621 // If the other block ends in an unconditional branch, check for the 'if then
1622 // else' case. There is an instruction before the branch.
1623 StoreInst *OtherStore = nullptr;
1624 if (OtherBr->isUnconditional()) {
1625 --BBI;
1626 // Skip over debugging info and pseudo probes.
1627 while (BBI->isDebugOrPseudoInst()) {
1628 if (BBI==OtherBB->begin())
1629 return false;
1630 --BBI;
1631 }
1632 // If this isn't a store, isn't a store to the same location, or is not the
1633 // right kind of store, bail out.
1634 OtherStore = dyn_cast<StoreInst>(BBI);
1635 if (!OtherStoreIsMergeable(OtherStore))
1636 return false;
1637 } else {
1638 // Otherwise, the other block ended with a conditional branch. If one of the
1639 // destinations is StoreBB, then we have the if/then case.
1640 if (OtherBr->getSuccessor(0) != StoreBB &&
1641 OtherBr->getSuccessor(1) != StoreBB)
1642 return false;
1643
1644 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1645 // if/then triangle. See if there is a store to the same ptr as SI that
1646 // lives in OtherBB.
1647 for (;; --BBI) {
1648 // Check to see if we find the matching store.
1649 OtherStore = dyn_cast<StoreInst>(BBI);
1650 if (OtherStoreIsMergeable(OtherStore))
1651 break;
1652
1653 // If we find something that may be using or overwriting the stored
1654 // value, or if we run out of instructions, we can't do the transform.
1655 if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1656 BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1657 return false;
1658 }
1659
1660 // In order to eliminate the store in OtherBr, we have to make sure nothing
1661 // reads or overwrites the stored value in StoreBB.
1662 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1663 // FIXME: This should really be AA driven.
1664 if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1665 return false;
1666 }
1667 }
1668
1669 // Insert a PHI node now if we need it.
1670 Value *MergedVal = OtherStore->getValueOperand();
1671 // The debug locations of the original instructions might differ. Merge them.
1672 DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
1673 OtherStore->getDebugLoc());
1674 if (MergedVal != SI.getValueOperand()) {
1675 PHINode *PN =
1676 PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
1677 PN->addIncoming(SI.getValueOperand(), SI.getParent());
1678 Builder.SetInsertPoint(OtherStore);
1679 PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
1680 OtherBB);
1681 MergedVal = InsertNewInstBefore(PN, DestBB->begin());
1682 PN->setDebugLoc(MergedLoc);
1683 }
1684
1685 // Advance to a place where it is safe to insert the new store and insert it.
1686 BBI = DestBB->getFirstInsertionPt();
1687 StoreInst *NewSI =
1688 new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
1689 SI.getOrdering(), SI.getSyncScopeID());
1690 InsertNewInstBefore(NewSI, BBI);
1691 NewSI->setDebugLoc(MergedLoc);
1692 NewSI->mergeDIAssignID({&SI, OtherStore});
1693
1694 // If the two stores had AA tags, merge them.
1695 AAMDNodes AATags = SI.getAAMetadata();
1696 if (AATags)
1697 NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
1698
1699 // Nuke the old stores.
1701 eraseInstFromFunction(*OtherStore);
1702 return true;
1703}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides internal interfaces used to implement the InstCombine.
static StoreInst * combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
static Instruction * combineLoadToOperationType(InstCombinerImpl &IC, LoadInst &Load)
Combine loads to match the type of their uses' value after looking through intervening bitcasts.
static Instruction * replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, Instruction &MemI)
static Instruction * simplifyAllocaArraySize(InstCombinerImpl &IC, AllocaInst &AI, DominatorTree &DT)
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isSupportedAtomicType(Type *Ty)
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
static Instruction * unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI)
cl::opt< bool > EnableInferAlignmentPass("enable-infer-alignment-pass", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable the InferAlignment pass, disabling alignment inference in " "InstCombine"))
static cl::opt< unsigned > MaxCopiedFromConstantUsers("instcombine-max-copied-from-constant-users", cl::init(300), cl::desc("Maximum users to visit in copy from constant transform"), cl::Hidden)
static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI)
static bool removeBitcastsFromLoadStoreOnMinMax(InstCombinerImpl &IC, StoreInst &SI)
Converts store (bitcast (load (bitcast (select ...)))) to store (load (select ...)),...
static Value * likeBitCastFromVector(InstCombinerImpl &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
static bool isMinMaxWithLoads(Value *V, Type *&LoadTy)
Returns true if instruction represent minmax pattern like: select ((cmp load V1, load V2),...
static bool isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction * > &ToDelete)
isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) pointer to an alloca.
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
This file provides the interface for the instcombine pass implementation.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
cl::opt< bool > EnableInferAlignmentPass
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallString class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Returns a bitmask that should be unconditionally applied to the ModRef info of a memory location.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:58
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:125
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:118
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:105
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
Definition: Instructions.h:129
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:96
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:318
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:227
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
This class represents a no-op cast from one type to another.
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:903
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:872
A debug info location.
Definition: DebugLoc.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
const BasicBlock & getEntryBlock() const
Definition: Function.h:747
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:989
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
Type * getSourceElementType() const
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:966
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1769
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2485
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1803
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2478
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1872
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:472
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2167
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1786
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2158
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1822
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitLoadInst(LoadInst &LI)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitStoreInst(StoreInst &SI)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
bool removeInstructionsBeforeUnreachable(Instruction &I)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitAllocaInst(AllocaInst &AI)
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:376
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:401
AAResults * AA
Definition: InstCombiner.h:68
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:421
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:54
const SimplifyQuery SQ
Definition: InstCombiner.h:75
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:63
const DataLayout & DL
Definition: InstCombiner.h:74
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Definition: InstCombiner.h:105
AssumptionCache & AC
Definition: InstCombiner.h:71
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:445
DominatorTree & DT
Definition: InstCombiner.h:73
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:466
BuilderTy & Builder
Definition: InstCombiner.h:59
void push(Instruction *I)
Push the instruction onto the worklist stack.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:875
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:392
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1610
const BasicBlock * getParent() const
Definition: Instruction.h:90
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:75
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1596
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:389
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:177
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:270
void setAlignment(Align Align)
Definition: Instructions.h:224
Value * getPointerOperand()
Definition: Instructions.h:264
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:250
bool isSimple() const
Definition: Instructions.h:256
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
Metadata node.
Definition: Metadata.h:950
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
This class wraps the llvm.memcpy/memmove intrinsics.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
size_type size() const
Definition: SmallPtrSet.h:93
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Value * getValueOperand()
Definition: Instructions.h:390
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:376
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)
Definition: TypeSize.h:328
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:262
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:204
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:243
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
self_iterator getIterator()
Definition: ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:724
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1727
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:199
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:2911
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2037
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:453
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1529
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1985
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2317
Value * simplifyLoadInst(LoadInst *LI, Value *PtrOp, const SimplifyQuery &Q)
Given a load instruction and its pointer operand, fold the result or return null.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2890
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt,...
Definition: STLExtras.h:2044
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:350
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:99
SimplifyQuery getWithInstruction(Instruction *I) const