LLVM 17.0.0git
LoadStoreOpt.cpp
Go to the documentation of this file.
1//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the LoadStoreOpt optimization pass.
10//===----------------------------------------------------------------------===//
11
13#include "llvm/ADT/Statistic.h"
35#include "llvm/Support/Debug.h"
38#include <algorithm>
39
40#define DEBUG_TYPE "loadstore-opt"
41
42using namespace llvm;
43using namespace ore;
44using namespace MIPatternMatch;
45
46STATISTIC(NumStoresMerged, "Number of stores merged");
47
48const unsigned MaxStoreSizeToForm = 128;
49
50char LoadStoreOpt::ID = 0;
51INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
52 false, false)
55
57 : MachineFunctionPass(ID), DoNotRunPass(F) {}
58
60 : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
61
62void LoadStoreOpt::init(MachineFunction &MF) {
63 this->MF = &MF;
64 MRI = &MF.getRegInfo();
65 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
68 Builder.setMF(MF);
69 IsPreLegalizer = !MF.getProperties().hasProperty(
71 InstsToErase.clear();
72}
73
76 AU.setPreservesAll();
79}
80
84 Register PtrAddRHS;
85 if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
86 Info.BaseReg = Ptr;
87 Info.IndexReg = Register();
88 Info.IsIndexSignExt = false;
89 return Info;
90 }
91
92 auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
93 if (RHSCst)
94 Info.Offset = RHSCst->Value.getSExtValue();
95
96 // Just recognize a simple case for now. In future we'll need to match
97 // indexing patterns for base + index + constant.
98 Info.IndexReg = PtrAddRHS;
99 Info.IsIndexSignExt = false;
100 return Info;
101}
102
104 const MachineInstr &MI2,
105 bool &IsAlias,
107 auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
108 auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
109 if (!LdSt1 || !LdSt2)
110 return false;
111
112 BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
113 BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
114
115 if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
116 return false;
117
118 int64_t Size1 = LdSt1->getMemSize();
119 int64_t Size2 = LdSt2->getMemSize();
120
121 int64_t PtrDiff;
122 if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
123 PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
124 // If the size of memory access is unknown, do not use it to do analysis.
125 // One example of unknown size memory access is to load/store scalable
126 // vector objects on the stack.
127 // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
128 // following situations arise:
129 if (PtrDiff >= 0 &&
130 Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
131 // [----BasePtr0----]
132 // [---BasePtr1--]
133 // ========PtrDiff========>
134 IsAlias = !(Size1 <= PtrDiff);
135 return true;
136 }
137 if (PtrDiff < 0 &&
138 Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
139 // [----BasePtr0----]
140 // [---BasePtr1--]
141 // =====(-PtrDiff)====>
142 IsAlias = !((PtrDiff + Size2) <= 0);
143 return true;
144 }
145 return false;
146 }
147
148 // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
149 // able to calculate their relative offset if at least one arises
150 // from an alloca. However, these allocas cannot overlap and we
151 // can infer there is no alias.
152 auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
153 auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
154 if (!Base0Def || !Base1Def)
155 return false; // Couldn't tell anything.
156
157
158 if (Base0Def->getOpcode() != Base1Def->getOpcode())
159 return false;
160
161 if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
162 MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
163 // If the bases have the same frame index but we couldn't find a
164 // constant offset, (indices are different) be conservative.
165 if (Base0Def != Base1Def &&
166 (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
167 !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
168 IsAlias = false;
169 return true;
170 }
171 }
172
173 // This implementation is a lot more primitive than the SDAG one for now.
174 // FIXME: what about constant pools?
175 if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
176 auto GV0 = Base0Def->getOperand(1).getGlobal();
177 auto GV1 = Base1Def->getOperand(1).getGlobal();
178 if (GV0 != GV1) {
179 IsAlias = false;
180 return true;
181 }
182 }
183
184 // Can't tell anything about aliasing.
185 return false;
186}
187
189 const MachineInstr &Other,
191 AliasAnalysis *AA) {
192 struct MemUseCharacteristics {
193 bool IsVolatile;
194 bool IsAtomic;
195 Register BasePtr;
196 int64_t Offset;
197 uint64_t NumBytes;
199 };
200
201 auto getCharacteristics =
202 [&](const MachineInstr *MI) -> MemUseCharacteristics {
203 if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
204 Register BaseReg;
205 int64_t Offset = 0;
206 // No pre/post-inc addressing modes are considered here, unlike in SDAG.
207 if (!mi_match(LS->getPointerReg(), MRI,
208 m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
209 BaseReg = LS->getPointerReg();
210 Offset = 0;
211 }
212
214 LS->getMMO().getMemoryType().getSizeInBytes());
215 return {LS->isVolatile(), LS->isAtomic(), BaseReg,
216 Offset /*base offset*/, Size, &LS->getMMO()};
217 }
218 // FIXME: support recognizing lifetime instructions.
219 // Default.
220 return {false /*isvolatile*/,
221 /*isAtomic*/ false, Register(),
222 (int64_t)0 /*offset*/, 0 /*size*/,
223 (MachineMemOperand *)nullptr};
224 };
225 MemUseCharacteristics MUC0 = getCharacteristics(&MI),
226 MUC1 = getCharacteristics(&Other);
227
228 // If they are to the same address, then they must be aliases.
229 if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
230 MUC0.Offset == MUC1.Offset)
231 return true;
232
233 // If they are both volatile then they cannot be reordered.
234 if (MUC0.IsVolatile && MUC1.IsVolatile)
235 return true;
236
237 // Be conservative about atomics for the moment
238 // TODO: This is way overconservative for unordered atomics (see D66309)
239 if (MUC0.IsAtomic && MUC1.IsAtomic)
240 return true;
241
242 // If one operation reads from invariant memory, and the other may store, they
243 // cannot alias.
244 if (MUC0.MMO && MUC1.MMO) {
245 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
246 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
247 return false;
248 }
249
250 // Try to prove that there is aliasing, or that there is no aliasing. Either
251 // way, we can return now. If nothing can be proved, proceed with more tests.
252 bool IsAlias;
254 return IsAlias;
255
256 // The following all rely on MMO0 and MMO1 being valid.
257 if (!MUC0.MMO || !MUC1.MMO)
258 return true;
259
260 // FIXME: port the alignment based alias analysis from SDAG's isAlias().
261 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
262 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
263 uint64_t Size0 = MUC0.NumBytes;
264 uint64_t Size1 = MUC1.NumBytes;
265 if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
268 // Use alias analysis information.
269 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
270 int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
271 int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
272 if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
273 MUC0.MMO->getAAInfo()),
274 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
275 MUC1.MMO->getAAInfo())))
276 return false;
277 }
278
279 // Otherwise we have to assume they alias.
280 return true;
281}
282
283/// Returns true if the instruction creates an unavoidable hazard that
284/// forces a boundary between store merge candidates.
286 return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
287}
288
289bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
290 // Try to merge all the stores in the vector, splitting into separate segments
291 // as necessary.
292 assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
293 LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
294 LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
295 unsigned AS = PtrTy.getAddressSpace();
296 // Ensure the legal store info is computed for this address space.
297 initializeStoreMergeTargetInfo(AS);
298 const auto &LegalSizes = LegalStoreSizes[AS];
299
300#ifndef NDEBUG
301 for (auto *StoreMI : StoresToMerge)
302 assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
303#endif
304
305 const auto &DL = MF->getFunction().getParent()->getDataLayout();
306 bool AnyMerged = false;
307 do {
308 unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size());
309 unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
310 // Compute the biggest store we can generate to handle the number of stores.
311 unsigned MergeSizeBits;
312 for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
313 LLT StoreTy = LLT::scalar(MergeSizeBits);
314 EVT StoreEVT =
316 if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
317 TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
318 (TLI->isTypeLegal(StoreEVT)))
319 break; // We can generate a MergeSize bits store.
320 }
321 if (MergeSizeBits <= OrigTy.getSizeInBits())
322 return AnyMerged; // No greater merge.
323
324 unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
325 // Perform the actual merging.
326 SmallVector<GStore *, 8> SingleMergeStores(
327 StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
328 AnyMerged |= doSingleStoreMerge(SingleMergeStores);
329 StoresToMerge.erase(StoresToMerge.begin(),
330 StoresToMerge.begin() + NumStoresToMerge);
331 } while (StoresToMerge.size() > 1);
332 return AnyMerged;
333}
334
335bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
336 MachineFunction &MF) const {
337 auto Action = LI->getAction(Query).Action;
338 // If the instruction is unsupported, it can't be legalized at all.
339 if (Action == LegalizeActions::Unsupported)
340 return false;
341 return IsPreLegalizer || Action == LegalizeAction::Legal;
342}
343
344bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
345 assert(Stores.size() > 1);
346 // We know that all the stores are consecutive and there are no aliasing
347 // operations in the range. However, the values that are being stored may be
348 // generated anywhere before each store. To ensure we have the values
349 // available, we materialize the wide value and new store at the place of the
350 // final store in the merge sequence.
351 GStore *FirstStore = Stores[0];
352 const unsigned NumStores = Stores.size();
353 LLT SmallTy = MRI->getType(FirstStore->getValueReg());
354 LLT WideValueTy =
355 LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedValue());
356
357 // For each store, compute pairwise merged debug locs.
358 DebugLoc MergedLoc = Stores.front()->getDebugLoc();
359 for (auto *Store : drop_begin(Stores))
360 MergedLoc = DILocation::getMergedLocation(MergedLoc, Store->getDebugLoc());
361
362 Builder.setInstr(*Stores.back());
363 Builder.setDebugLoc(MergedLoc);
364
365 // If all of the store values are constants, then create a wide constant
366 // directly. Otherwise, we need to generate some instructions to merge the
367 // existing values together into a wider type.
368 SmallVector<APInt, 8> ConstantVals;
369 for (auto *Store : Stores) {
370 auto MaybeCst =
371 getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
372 if (!MaybeCst) {
373 ConstantVals.clear();
374 break;
375 }
376 ConstantVals.emplace_back(MaybeCst->Value);
377 }
378
379 Register WideReg;
380 auto *WideMMO =
381 MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
382 if (ConstantVals.empty()) {
383 // Mimic the SDAG behaviour here and don't try to do anything for unknown
384 // values. In future, we should also support the cases of loads and
385 // extracted vector elements.
386 return false;
387 }
388
389 assert(ConstantVals.size() == NumStores);
390 // Check if our wide constant is legal.
391 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
392 return false;
393 APInt WideConst(WideValueTy.getSizeInBits(), 0);
394 for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
395 // Insert the smaller constant into the corresponding position in the
396 // wider one.
397 WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
398 }
399 WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
400 auto NewStore =
401 Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
402 (void) NewStore;
403 LLVM_DEBUG(dbgs() << "Merged " << Stores.size()
404 << " stores into merged store: " << *NewStore);
405 LLVM_DEBUG(for (auto *MI : Stores) dbgs() << " " << *MI;);
406 NumStoresMerged += Stores.size();
407
409 MORE.emit([&]() {
411 FirstStore->getDebugLoc(),
412 FirstStore->getParent());
413 R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
414 << NV("OrigWidth", SmallTy.getSizeInBytes())
415 << " bytes into a single store of "
416 << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
417 return R;
418 });
419
420 for (auto *MI : Stores)
421 InstsToErase.insert(MI);
422 return true;
423}
424
425bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
426 if (C.Stores.size() < 2) {
427 C.reset();
428 return false;
429 }
430
431 LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
432 << " stores, starting with " << *C.Stores[0]);
433 // We know that the stores in the candidate are adjacent.
434 // Now we need to check if any potential aliasing instructions recorded
435 // during the search alias with load/stores added to the candidate after.
436 // For example, if we have the candidate:
437 // C.Stores = [ST1, ST2, ST3, ST4]
438 // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
439 // ST2, then we would have recorded it into the PotentialAliases structure
440 // with the associated index value of "1". Then we see ST3 and ST4 and add
441 // them to the candidate group. We know that LD1 does not alias with ST1 or
442 // ST2, since we already did that check. However we don't yet know if it
443 // may alias ST3 and ST4, so we perform those checks now.
444 SmallVector<GStore *> StoresToMerge;
445
446 auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
447 for (auto AliasInfo : reverse(C.PotentialAliases)) {
448 MachineInstr *PotentialAliasOp = AliasInfo.first;
449 unsigned PreCheckedIdx = AliasInfo.second;
450 if (static_cast<unsigned>(Idx) < PreCheckedIdx) {
451 // Once our store index is lower than the index associated with the
452 // potential alias, we know that we've already checked for this alias
453 // and all of the earlier potential aliases too.
454 return false;
455 }
456 // Need to check this alias.
457 if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
458 AA)) {
459 LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
460 << " detected\n");
461 return true;
462 }
463 }
464 return false;
465 };
466 // Start from the last store in the group, and check if it aliases with any
467 // of the potential aliasing operations in the list.
468 for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
469 auto *CheckStore = C.Stores[StoreIdx];
470 if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
471 continue;
472 StoresToMerge.emplace_back(CheckStore);
473 }
474
475 LLVM_DEBUG(dbgs() << StoresToMerge.size()
476 << " stores remaining after alias checks. Merging...\n");
477
478 // Now we've checked for aliasing hazards, merge any stores left.
479 C.reset();
480 if (StoresToMerge.size() < 2)
481 return false;
482 return mergeStores(StoresToMerge);
483}
484
485bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
486 StoreMergeCandidate &C) {
487 if (C.Stores.empty())
488 return false;
489 return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
490 return instMayAlias(MI, *OtherMI, *MRI, AA);
491 });
492}
493
494void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
495 PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
496}
497
498bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
499 StoreMergeCandidate &C) {
500 // Check if the given store writes to an adjacent address, and other
501 // requirements.
502 LLT ValueTy = MRI->getType(StoreMI.getValueReg());
503 LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
504
505 // Only handle scalars.
506 if (!ValueTy.isScalar())
507 return false;
508
509 // Don't allow truncating stores for now.
510 if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
511 return false;
512
513 // Avoid adding volatile or ordered stores to the candidate. We already have a
514 // check for this in instMayAlias() but that only get's called later between
515 // potential aliasing hazards.
516 if (!StoreMI.isSimple())
517 return false;
518
519 Register StoreAddr = StoreMI.getPointerReg();
520 auto BIO = getPointerInfo(StoreAddr, *MRI);
521 Register StoreBase = BIO.BaseReg;
522 uint64_t StoreOffCst = BIO.Offset;
523 if (C.Stores.empty()) {
524 // This is the first store of the candidate.
525 // If the offset can't possibly allow for a lower addressed store with the
526 // same base, don't bother adding it.
527 if (StoreOffCst < ValueTy.getSizeInBytes())
528 return false;
529 C.BasePtr = StoreBase;
530 C.CurrentLowestOffset = StoreOffCst;
531 C.Stores.emplace_back(&StoreMI);
532 LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
533 << StoreMI);
534 return true;
535 }
536
537 // Check the store is the same size as the existing ones in the candidate.
538 if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
539 ValueTy.getSizeInBits())
540 return false;
541
542 if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
543 PtrTy.getAddressSpace())
544 return false;
545
546 // There are other stores in the candidate. Check that the store address
547 // writes to the next lowest adjacent address.
548 if (C.BasePtr != StoreBase)
549 return false;
550 if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
551 static_cast<uint64_t>(StoreOffCst))
552 return false;
553
554 // This writes to an adjacent address. Allow it.
555 C.Stores.emplace_back(&StoreMI);
556 C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
557 LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
558 return true;
559}
560
561bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
562 bool Changed = false;
563 // Walk through the block bottom-up, looking for merging candidates.
564 StoreMergeCandidate Candidate;
565 for (MachineInstr &MI : llvm::reverse(MBB)) {
566 if (InstsToErase.contains(&MI))
567 continue;
568
569 if (auto *StoreMI = dyn_cast<GStore>(&MI)) {
570 // We have a G_STORE. Add it to the candidate if it writes to an adjacent
571 // address.
572 if (!addStoreToCandidate(*StoreMI, Candidate)) {
573 // Store wasn't eligible to be added. May need to record it as a
574 // potential alias.
575 if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
576 Changed |= processMergeCandidate(Candidate);
577 continue;
578 }
579 Candidate.addPotentialAlias(*StoreMI);
580 }
581 continue;
582 }
583
584 // If we don't have any stores yet, this instruction can't pose a problem.
585 if (Candidate.Stores.empty())
586 continue;
587
588 // We're dealing with some other kind of instruction.
590 Changed |= processMergeCandidate(Candidate);
591 Candidate.Stores.clear();
592 continue;
593 }
594
595 if (!MI.mayLoadOrStore())
596 continue;
597
598 if (operationAliasesWithCandidate(MI, Candidate)) {
599 // We have a potential alias, so process the current candidate if we can
600 // and then continue looking for a new candidate.
601 Changed |= processMergeCandidate(Candidate);
602 continue;
603 }
604
605 // Record this instruction as a potential alias for future stores that are
606 // added to the candidate.
607 Candidate.addPotentialAlias(MI);
608 }
609
610 // Process any candidate left after finishing searching the entire block.
611 Changed |= processMergeCandidate(Candidate);
612
613 // Erase instructions now that we're no longer iterating over the block.
614 for (auto *MI : InstsToErase)
615 MI->eraseFromParent();
616 InstsToErase.clear();
617 return Changed;
618}
619
620bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
621 bool Changed = false;
622 for (auto &BB : MF) {
623 Changed |= mergeBlockStores(BB);
624 }
625 return Changed;
626}
627
628void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
629 // Query the legalizer info to record what store types are legal.
630 // We record this because we don't want to bother trying to merge stores into
631 // illegal ones, which would just result in being split again.
632
633 if (LegalStoreSizes.count(AddrSpace)) {
634 assert(LegalStoreSizes[AddrSpace].any());
635 return; // Already cached sizes for this address space.
636 }
637
638 // Need to reserve at least MaxStoreSizeToForm + 1 bits.
639 BitVector LegalSizes(MaxStoreSizeToForm * 2);
640 const auto &LI = *MF->getSubtarget().getLegalizerInfo();
641 const auto &DL = MF->getFunction().getParent()->getDataLayout();
642 Type *IntPtrIRTy =
643 DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
644 LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
645 // We assume that we're not going to be generating any stores wider than
646 // MaxStoreSizeToForm bits for now.
647 for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
648 LLT Ty = LLT::scalar(Size);
651 SmallVector<LLT> StoreTys({Ty, PtrTy});
652 LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
653 LegalizeActionStep ActionStep = LI.getAction(Q);
654 if (ActionStep.Action == LegalizeActions::Legal)
655 LegalSizes.set(Size);
656 }
657 assert(LegalSizes.any() && "Expected some store sizes to be legal!");
658 LegalStoreSizes[AddrSpace] = LegalSizes;
659}
660
662 // If the ISel pipeline failed, do not bother running that pass.
663 if (MF.getProperties().hasProperty(
665 return false;
666
667 LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
668 << '\n');
669
670 init(MF);
671 bool Changed = false;
672 Changed |= mergeFunctionStores(MF);
673
674 LegalStoreSizes.clear();
675 return Changed;
676}
unsigned const MachineRegisterInfo * MRI
@ Generic
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Generic memory optimizations
const unsigned MaxStoreSizeToForm
#define DEBUG_TYPE
static bool isInstHardMergeHazard(MachineInstr &MI)
Returns true if the instruction creates an unavoidable hazard that forces a boundary between store me...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition: MD5.cpp:55
Contains matchers for matching SSA Machine Instructions.
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
print Print MemDeps of function
This file provides utility analysis objects describing memory locations.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Class for arbitrary precision integers.
Definition: APInt.h:75
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
A debug info location.
Definition: DebugLoc.h:33
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
uint64_t getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr unsigned getAddressSpace() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
static char ID
Definition: LoadStoreOpt.h:64
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool hasProperty(Property P) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
void setMF(MachineFunction &MF)
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
A description of a memory reference used in the backend.
Diagnostic information for applied optimization remarks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Representation for a specific memory location.
static uint64_t getSizeOrUnknown(const TypeSize &T)
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isValid() const
Definition: Register.h:126
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual const LegalizerInfo * getLegalizerInfo() const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2, bool &IsAlias, MachineRegisterInfo &MRI)
Compute whether or not a memory access at MI1 aliases with an access at MI2.
BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI)
Returns a BaseIndexOffset which describes the pointer in Ptr.
bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other, MachineRegisterInfo &MRI, AliasAnalysis *AA)
Returns true if the instruction MI may alias Other.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:46
@ Unsupported
This operation is completely unsupported on the target.
Definition: LegalizerInfo.h:90
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
ManagedStatic< cl::opt< FnT >, OptCreatorT > Action
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:413
@ Offset
Definition: DWP.cpp:406
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:461
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:895
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:409
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:291
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Definition: BitVector.h:858
#define MORE()
Definition: regcomp.c:252
Extended Value Type.
Definition: ValueTypes.h:34
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:36
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
The result of a query.
LegalizeAction Action
The action to take or the final answer.