LLVM  15.0.0git
LoadStoreOpt.cpp
Go to the documentation of this file.
1 //===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the LoadStoreOpt optimization pass.
10 //===----------------------------------------------------------------------===//
11 
13 #include "llvm/ADT/Statistic.h"
28 #include "llvm/CodeGen/Register.h"
32 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
38 #include <algorithm>
39 
40 #define DEBUG_TYPE "loadstore-opt"
41 
42 using namespace llvm;
43 using namespace ore;
44 using namespace MIPatternMatch;
45 
46 STATISTIC(NumStoresMerged, "Number of stores merged");
47 
48 const unsigned MaxStoreSizeToForm = 128;
49 
50 char LoadStoreOpt::ID = 0;
51 INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
52  false, false)
55 
57  : MachineFunctionPass(ID), DoNotRunPass(F) {}
58 
60  : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
61 
62 void LoadStoreOpt::init(MachineFunction &MF) {
63  this->MF = &MF;
64  MRI = &MF.getRegInfo();
65  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
66  TLI = MF.getSubtarget().getTargetLowering();
67  LI = MF.getSubtarget().getLegalizerInfo();
68  Builder.setMF(MF);
69  IsPreLegalizer = !MF.getProperties().hasProperty(
71  InstsToErase.clear();
72 }
73 
78 }
79 
83  Register PtrAddRHS;
84  if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
85  Info.BaseReg = Ptr;
86  Info.IndexReg = Register();
87  Info.IsIndexSignExt = false;
88  return Info;
89  }
90 
91  auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
92  if (RHSCst)
93  Info.Offset = RHSCst->Value.getSExtValue();
94 
95  // Just recognize a simple case for now. In future we'll need to match
96  // indexing patterns for base + index + constant.
97  Info.IndexReg = PtrAddRHS;
98  Info.IsIndexSignExt = false;
99  return Info;
100 }
101 
103  const MachineInstr &MI2,
104  bool &IsAlias,
106  auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
107  auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
108  if (!LdSt1 || !LdSt2)
109  return false;
110 
111  BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
112  BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
113 
114  if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
115  return false;
116 
117  int64_t Size1 = LdSt1->getMemSize();
118  int64_t Size2 = LdSt2->getMemSize();
119 
120  int64_t PtrDiff;
121  if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
122  PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
123  // If the size of memory access is unknown, do not use it to do analysis.
124  // One example of unknown size memory access is to load/store scalable
125  // vector objects on the stack.
126  // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
127  // following situations arise:
128  if (PtrDiff >= 0 &&
129  Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
130  // [----BasePtr0----]
131  // [---BasePtr1--]
132  // ========PtrDiff========>
133  IsAlias = !(Size1 <= PtrDiff);
134  return true;
135  }
136  if (PtrDiff < 0 &&
137  Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
138  // [----BasePtr0----]
139  // [---BasePtr1--]
140  // =====(-PtrDiff)====>
141  IsAlias = !((PtrDiff + Size2) <= 0);
142  return true;
143  }
144  return false;
145  }
146 
147  // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
148  // able to calculate their relative offset if at least one arises
149  // from an alloca. However, these allocas cannot overlap and we
150  // can infer there is no alias.
151  auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
152  auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
153  if (!Base0Def || !Base1Def)
154  return false; // Couldn't tell anything.
155 
156 
157  if (Base0Def->getOpcode() != Base1Def->getOpcode())
158  return false;
159 
160  if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
161  MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
162  // If the bases have the same frame index but we couldn't find a
163  // constant offset, (indices are different) be conservative.
164  if (Base0Def != Base1Def &&
165  (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
166  !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
167  IsAlias = false;
168  return true;
169  }
170  }
171 
172  // This implementation is a lot more primitive than the SDAG one for now.
173  // FIXME: what about constant pools?
174  if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
175  auto GV0 = Base0Def->getOperand(1).getGlobal();
176  auto GV1 = Base1Def->getOperand(1).getGlobal();
177  if (GV0 != GV1) {
178  IsAlias = false;
179  return true;
180  }
181  }
182 
183  // Can't tell anything about aliasing.
184  return false;
185 }
186 
188  const MachineInstr &Other,
190  AliasAnalysis *AA) {
191  struct MemUseCharacteristics {
192  bool IsVolatile;
193  bool IsAtomic;
194  Register BasePtr;
195  int64_t Offset;
196  uint64_t NumBytes;
197  MachineMemOperand *MMO;
198  };
199 
200  auto getCharacteristics =
201  [&](const MachineInstr *MI) -> MemUseCharacteristics {
202  if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
203  Register BaseReg;
204  int64_t Offset = 0;
205  // No pre/post-inc addressing modes are considered here, unlike in SDAG.
206  if (!mi_match(LS->getPointerReg(), MRI,
207  m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
208  BaseReg = LS->getPointerReg();
209  Offset = 0;
210  }
211 
213  LS->getMMO().getMemoryType().getSizeInBytes());
214  return {LS->isVolatile(), LS->isAtomic(), BaseReg,
215  Offset /*base offset*/, Size, &LS->getMMO()};
216  }
217  // FIXME: support recognizing lifetime instructions.
218  // Default.
219  return {false /*isvolatile*/,
220  /*isAtomic*/ false, Register(),
221  (int64_t)0 /*offset*/, 0 /*size*/,
222  (MachineMemOperand *)nullptr};
223  };
224  MemUseCharacteristics MUC0 = getCharacteristics(&MI),
225  MUC1 = getCharacteristics(&Other);
226 
227  // If they are to the same address, then they must be aliases.
228  if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
229  MUC0.Offset == MUC1.Offset)
230  return true;
231 
232  // If they are both volatile then they cannot be reordered.
233  if (MUC0.IsVolatile && MUC1.IsVolatile)
234  return true;
235 
236  // Be conservative about atomics for the moment
237  // TODO: This is way overconservative for unordered atomics (see D66309)
238  if (MUC0.IsAtomic && MUC1.IsAtomic)
239  return true;
240 
241  // If one operation reads from invariant memory, and the other may store, they
242  // cannot alias.
243  if (MUC0.MMO && MUC1.MMO) {
244  if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
245  (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
246  return false;
247  }
248 
249  // Try to prove that there is aliasing, or that there is no aliasing. Either
250  // way, we can return now. If nothing can be proved, proceed with more tests.
251  bool IsAlias;
253  return IsAlias;
254 
255  // The following all rely on MMO0 and MMO1 being valid.
256  if (!MUC0.MMO || !MUC1.MMO)
257  return true;
258 
259  // FIXME: port the alignment based alias analysis from SDAG's isAlias().
260  int64_t SrcValOffset0 = MUC0.MMO->getOffset();
261  int64_t SrcValOffset1 = MUC1.MMO->getOffset();
262  uint64_t Size0 = MUC0.NumBytes;
263  uint64_t Size1 = MUC1.NumBytes;
264  if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
265  Size0 != MemoryLocation::UnknownSize &&
266  Size1 != MemoryLocation::UnknownSize) {
267  // Use alias analysis information.
268  int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
269  int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
270  int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
271  if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
272  MUC0.MMO->getAAInfo()),
273  MemoryLocation(MUC1.MMO->getValue(), Overlap1,
274  MUC1.MMO->getAAInfo())))
275  return false;
276  }
277 
278  // Otherwise we have to assume they alias.
279  return true;
280 }
281 
282 /// Returns true if the instruction creates an unavoidable hazard that
283 /// forces a boundary between store merge candidates.
285  return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
286 }
287 
288 bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
289  // Try to merge all the stores in the vector, splitting into separate segments
290  // as necessary.
291  assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
292  LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
293  LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
294  unsigned AS = PtrTy.getAddressSpace();
295  // Ensure the legal store info is computed for this address space.
296  initializeStoreMergeTargetInfo(AS);
297  const auto &LegalSizes = LegalStoreSizes[AS];
298 
299 #ifndef NDEBUG
300  for (auto StoreMI : StoresToMerge)
301  assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
302 #endif
303 
304  const auto &DL = MF->getFunction().getParent()->getDataLayout();
305  bool AnyMerged = false;
306  do {
307  unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
308  unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize();
309  // Compute the biggest store we can generate to handle the number of stores.
310  unsigned MergeSizeBits;
311  for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
312  LLT StoreTy = LLT::scalar(MergeSizeBits);
313  EVT StoreEVT =
315  if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
316  TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
317  (TLI->isTypeLegal(StoreEVT)))
318  break; // We can generate a MergeSize bits store.
319  }
320  if (MergeSizeBits <= OrigTy.getSizeInBits())
321  return AnyMerged; // No greater merge.
322 
323  unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
324  // Perform the actual merging.
325  SmallVector<GStore *, 8> SingleMergeStores(
326  StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
327  AnyMerged |= doSingleStoreMerge(SingleMergeStores);
328  StoresToMerge.erase(StoresToMerge.begin(),
329  StoresToMerge.begin() + NumStoresToMerge);
330  } while (StoresToMerge.size() > 1);
331  return AnyMerged;
332 }
333 
334 bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
335  MachineFunction &MF) const {
336  auto Action = LI->getAction(Query).Action;
337  // If the instruction is unsupported, it can't be legalized at all.
338  if (Action == LegalizeActions::Unsupported)
339  return false;
340  return IsPreLegalizer || Action == LegalizeAction::Legal;
341 }
342 
343 bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
344  assert(Stores.size() > 1);
345  // We know that all the stores are consecutive and there are no aliasing
346  // operations in the range. However, the values that are being stored may be
347  // generated anywhere before each store. To ensure we have the values
348  // available, we materialize the wide value and new store at the place of the
349  // final store in the merge sequence.
350  GStore *FirstStore = Stores[0];
351  const unsigned NumStores = Stores.size();
352  LLT SmallTy = MRI->getType(FirstStore->getValueReg());
353  LLT WideValueTy =
354  LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize());
355 
356  // For each store, compute pairwise merged debug locs.
357  DebugLoc MergedLoc;
358  for (unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx)
359  MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(),
360  Stores[BIdx]->getDebugLoc());
361  Builder.setInstr(*Stores.back());
362  Builder.setDebugLoc(MergedLoc);
363 
364  // If all of the store values are constants, then create a wide constant
365  // directly. Otherwise, we need to generate some instructions to merge the
366  // existing values together into a wider type.
367  SmallVector<APInt, 8> ConstantVals;
368  for (auto Store : Stores) {
369  auto MaybeCst =
370  getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
371  if (!MaybeCst) {
372  ConstantVals.clear();
373  break;
374  }
375  ConstantVals.emplace_back(MaybeCst->Value);
376  }
377 
378  Register WideReg;
379  auto *WideMMO =
380  MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
381  if (ConstantVals.empty()) {
382  // Mimic the SDAG behaviour here and don't try to do anything for unknown
383  // values. In future, we should also support the cases of loads and
384  // extracted vector elements.
385  return false;
386  }
387 
388  assert(ConstantVals.size() == NumStores);
389  // Check if our wide constant is legal.
390  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
391  return false;
392  APInt WideConst(WideValueTy.getSizeInBits(), 0);
393  for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
394  // Insert the smaller constant into the corresponding position in the
395  // wider one.
396  WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
397  }
398  WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
399  auto NewStore =
400  Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
401  (void) NewStore;
402  LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
403  NumStoresMerged += Stores.size();
404 
406  MORE.emit([&]() {
407  MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
408  FirstStore->getDebugLoc(),
409  FirstStore->getParent());
410  R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
411  << NV("OrigWidth", SmallTy.getSizeInBytes())
412  << " bytes into a single store of "
413  << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
414  return R;
415  });
416 
417  for (auto MI : Stores)
418  InstsToErase.insert(MI);
419  return true;
420 }
421 
422 bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
423  if (C.Stores.size() < 2) {
424  C.reset();
425  return false;
426  }
427 
428  LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
429  << " stores, starting with " << *C.Stores[0]);
430  // We know that the stores in the candidate are adjacent.
431  // Now we need to check if any potential aliasing instructions recorded
432  // during the search alias with load/stores added to the candidate after.
433  // For example, if we have the candidate:
434  // C.Stores = [ST1, ST2, ST3, ST4]
435  // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
436  // ST2, then we would have recorded it into the PotentialAliases structure
437  // with the associated index value of "1". Then we see ST3 and ST4 and add
438  // them to the candidate group. We know that LD1 does not alias with ST1 or
439  // ST2, since we already did that check. However we don't yet know if it
440  // may alias ST3 and ST4, so we perform those checks now.
441  SmallVector<GStore *> StoresToMerge;
442 
443  auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
444  for (auto AliasInfo : reverse(C.PotentialAliases)) {
445  MachineInstr *PotentialAliasOp = AliasInfo.first;
446  unsigned PreCheckedIdx = AliasInfo.second;
447  if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
448  // Need to check this alias.
449  if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
450  AA)) {
451  LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
452  << " detected\n");
453  return true;
454  }
455  } else {
456  // Once our store index is lower than the index associated with the
457  // potential alias, we know that we've already checked for this alias
458  // and all of the earlier potential aliases too.
459  return false;
460  }
461  }
462  return false;
463  };
464  // Start from the last store in the group, and check if it aliases with any
465  // of the potential aliasing operations in the list.
466  for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
467  auto *CheckStore = C.Stores[StoreIdx];
468  if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
469  continue;
470  StoresToMerge.emplace_back(CheckStore);
471  }
472 
473  LLVM_DEBUG(dbgs() << StoresToMerge.size()
474  << " stores remaining after alias checks. Merging...\n");
475 
476  // Now we've checked for aliasing hazards, merge any stores left.
477  C.reset();
478  if (StoresToMerge.size() < 2)
479  return false;
480  return mergeStores(StoresToMerge);
481 }
482 
483 bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
484  StoreMergeCandidate &C) {
485  if (C.Stores.empty())
486  return false;
487  return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
488  return instMayAlias(MI, *OtherMI, *MRI, AA);
489  });
490 }
491 
492 void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
493  PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
494 }
495 
496 bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
497  StoreMergeCandidate &C) {
498  // Check if the given store writes to an adjacent address, and other
499  // requirements.
500  LLT ValueTy = MRI->getType(StoreMI.getValueReg());
501  LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
502 
503  // Only handle scalars.
504  if (!ValueTy.isScalar())
505  return false;
506 
507  // Don't allow truncating stores for now.
508  if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
509  return false;
510 
511  // Avoid adding volatile or ordered stores to the candidate. We already have a
512  // check for this in instMayAlias() but that only get's called later between
513  // potential aliasing hazards.
514  if (!StoreMI.isSimple())
515  return false;
516 
517  Register StoreAddr = StoreMI.getPointerReg();
518  auto BIO = getPointerInfo(StoreAddr, *MRI);
519  Register StoreBase = BIO.BaseReg;
520  uint64_t StoreOffCst = BIO.Offset;
521  if (C.Stores.empty()) {
522  // This is the first store of the candidate.
523  // If the offset can't possibly allow for a lower addressed store with the
524  // same base, don't bother adding it.
525  if (StoreOffCst < ValueTy.getSizeInBytes())
526  return false;
527  C.BasePtr = StoreBase;
528  C.CurrentLowestOffset = StoreOffCst;
529  C.Stores.emplace_back(&StoreMI);
530  LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
531  << StoreMI);
532  return true;
533  }
534 
535  // Check the store is the same size as the existing ones in the candidate.
536  if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
537  ValueTy.getSizeInBits())
538  return false;
539 
540  if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
541  PtrTy.getAddressSpace())
542  return false;
543 
544  // There are other stores in the candidate. Check that the store address
545  // writes to the next lowest adjacent address.
546  if (C.BasePtr != StoreBase)
547  return false;
548  if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
549  static_cast<uint64_t>(StoreOffCst))
550  return false;
551 
552  // This writes to an adjacent address. Allow it.
553  C.Stores.emplace_back(&StoreMI);
554  C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
555  LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
556  return true;
557 }
558 
559 bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
560  bool Changed = false;
561  // Walk through the block bottom-up, looking for merging candidates.
562  StoreMergeCandidate Candidate;
563  for (MachineInstr &MI : llvm::reverse(MBB)) {
564  if (InstsToErase.contains(&MI))
565  continue;
566 
567  if (auto *StoreMI = dyn_cast<GStore>(&MI)) {
568  // We have a G_STORE. Add it to the candidate if it writes to an adjacent
569  // address.
570  if (!addStoreToCandidate(*StoreMI, Candidate)) {
571  // Store wasn't eligible to be added. May need to record it as a
572  // potential alias.
573  if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
574  Changed |= processMergeCandidate(Candidate);
575  continue;
576  }
577  Candidate.addPotentialAlias(*StoreMI);
578  }
579  continue;
580  }
581 
582  // If we don't have any stores yet, this instruction can't pose a problem.
583  if (Candidate.Stores.empty())
584  continue;
585 
586  // We're dealing with some other kind of instruction.
587  if (isInstHardMergeHazard(MI)) {
588  Changed |= processMergeCandidate(Candidate);
589  Candidate.Stores.clear();
590  continue;
591  }
592 
593  if (!MI.mayLoadOrStore())
594  continue;
595 
596  if (operationAliasesWithCandidate(MI, Candidate)) {
597  // We have a potential alias, so process the current candidate if we can
598  // and then continue looking for a new candidate.
599  Changed |= processMergeCandidate(Candidate);
600  continue;
601  }
602 
603  // Record this instruction as a potential alias for future stores that are
604  // added to the candidate.
605  Candidate.addPotentialAlias(MI);
606  }
607 
608  // Process any candidate left after finishing searching the entire block.
609  Changed |= processMergeCandidate(Candidate);
610 
611  // Erase instructions now that we're no longer iterating over the block.
612  for (auto *MI : InstsToErase)
613  MI->eraseFromParent();
614  InstsToErase.clear();
615  return Changed;
616 }
617 
618 bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
619  bool Changed = false;
620  for (auto &BB : MF) {
621  Changed |= mergeBlockStores(BB);
622  }
623  return Changed;
624 }
625 
626 void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
627  // Query the legalizer info to record what store types are legal.
628  // We record this because we don't want to bother trying to merge stores into
629  // illegal ones, which would just result in being split again.
630 
631  if (LegalStoreSizes.count(AddrSpace)) {
632  assert(LegalStoreSizes[AddrSpace].any());
633  return; // Already cached sizes for this address space.
634  }
635 
636  // Need to reserve at least MaxStoreSizeToForm + 1 bits.
637  BitVector LegalSizes(MaxStoreSizeToForm * 2);
638  const auto &LI = *MF->getSubtarget().getLegalizerInfo();
639  const auto &DL = MF->getFunction().getParent()->getDataLayout();
640  Type *IntPtrIRTy =
641  DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
642  LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
643  // We assume that we're not going to be generating any stores wider than
644  // MaxStoreSizeToForm bits for now.
645  for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
646  LLT Ty = LLT::scalar(Size);
649  SmallVector<LLT> StoreTys({Ty, PtrTy});
650  LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
651  LegalizeActionStep ActionStep = LI.getAction(Q);
652  if (ActionStep.Action == LegalizeActions::Legal)
653  LegalSizes.set(Size);
654  }
655  assert(LegalSizes.any() && "Expected some store sizes to be legal!");
656  LegalStoreSizes[AddrSpace] = LegalSizes;
657 }
658 
660  // If the ISel pipeline failed, do not bother running that pass.
661  if (MF.getProperties().hasProperty(
663  return false;
664 
665  LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
666  << '\n');
667 
668  init(MF);
669  bool Changed = false;
670  Changed |= mergeFunctionStores(MF);
671 
672  LegalStoreSizes.clear();
673  return Changed;
674 }
llvm::GISelAddressing::BaseIndexOffset
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:36
llvm::Check::Size
@ Size
Definition: FileCheck.h:76
MIPatternMatch.h
llvm::MachineIRBuilder::setMF
void setMF(MachineFunction &MF)
Definition: MachineIRBuilder.cpp:24
llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:176
LowLevelType.h
llvm::getDefIgnoringCopies
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:460
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MachineInstr.h
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::MachineIRBuilder::setDebugLoc
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
Definition: MachineIRBuilder.h:362
llvm::DILocation::getMergedLocation
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition: DebugInfoMetadata.cpp:101
AtomicOrdering.h
llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:252
DebugInfoMetadata.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:430
llvm::LegalizeActionStep::Action
LegalizeAction Action
The action to take or the final answer.
Definition: LegalizerInfo.h:144
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoadStoreOpt.cpp:40
llvm::LegalizeActionStep
The result of a query.
Definition: LegalizerInfo.h:142
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:456
ErrorHandling.h
llvm::GStore::getValueReg
Register getValueReg() const
Get the stored value register.
Definition: GenericMachineInstrs.h:133
llvm::MachineOptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: MachineOptimizationRemarkEmitter.h:151
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
OptimizationRemarkEmitter.h
MachineBasicBlock.h
llvm::GISelAddressing::getPointerInfo
BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI)
Returns a BaseIndexOffset which describes the pointer in Ptr.
Definition: LoadStoreOpt.cpp:80
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::GLoadStore::getMemSizeInBits
uint64_t getMemSizeInBits() const
Returns the size in bits of the memory access.
Definition: GenericMachineInstrs.h:63
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:126
GenericMachineInstrs.h
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:879
llvm::MachineIRBuilder::setInstr
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Definition: MachineIRBuilder.h:340
LegalizerInfo.h
llvm::TargetLoweringBase::canMergeStoresTo
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
Definition: TargetLowering.h:616
llvm::MemoryLocation::getSizeOrUnknown
static uint64_t getSizeOrUnknown(const TypeSize &T)
Definition: MemoryLocation.h:284
llvm::LegalityPredicates::any
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
Definition: LegalizerInfo.h:241
llvm::MIPatternMatch::m_GPtrAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:428
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition: MachineIRBuilder.cpp:283
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
MachineRegisterInfo.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", false, false) INITIALIZE_PASS_END(LoadStoreOpt
AliasAnalysis.h
llvm::getLLTForType
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Definition: LowLevelType.cpp:20
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineOptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:57
TargetLowering.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:650
llvm::PowerOf2Floor
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:722
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::LLT::getSizeInBits
TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelTypeImpl.h:152
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Utils.h
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:731
false
Definition: StackSlotColoring.cpp:141
llvm::GLoadStore::isSimple
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Definition: GenericMachineInstrs.h:54
TargetOpcodes.h
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::LoadStoreOpt
Definition: LoadStoreOpt.h:62
llvm::BitVector
Definition: BitVector.h:75
llvm::MachineFrameInfo::isFixedObjectIndex
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Definition: MachineFrameInfo.h:680
llvm::GLoadStore::getPointerReg
Register getPointerReg() const
Get the source register of the pointer value.
Definition: GenericMachineInstrs.h:44
llvm::LegalizeActions::Legal
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:46
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
MachineOptimizationRemarkEmitter.h
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
llvm::GISelAddressing::BaseIndexOffset::BaseReg
Register BaseReg
Definition: LoadStoreOpt.h:37
llvm::LLT::getAddressSpace
unsigned getAddressSpace() const
Definition: LowLevelTypeImpl.h:238
llvm::GStore
Represents a G_STORE.
Definition: GenericMachineInstrs.h:130
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:420
optimizations
Generic memory optimizations
Definition: LoadStoreOpt.cpp:53
llvm::IRSimilarity::Legal
@ Legal
Definition: IRSimilarityIdentifier.h:76
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:934
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:577
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
LoadStoreOpt.h
MemoryLocation.h
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
isInstHardMergeHazard
static bool isInstHardMergeHazard(MachineInstr &MI)
Returns true if the instruction creates an unavoidable hazard that forces a boundary between store me...
Definition: LoadStoreOpt.cpp:284
llvm::LegalizerInfo::getAction
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
Definition: LegalizerInfo.cpp:319
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::LLT::isScalar
bool isScalar() const
Definition: LowLevelTypeImpl.h:118
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:241
getDebugLoc
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
Definition: MachineInstrBundle.cpp:110
llvm::LegalityQuery
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Definition: LegalizerInfo.h:108
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1614
llvm::getIConstantVRegValWithLookThrough
Optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:408
llvm::LoadStoreOpt::ID
static char ID
Definition: LoadStoreOpt.h:64
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPU::HSAMD::Kernel::Arg::Key::IsVolatile
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
Definition: AMDGPUMetadata.h:199
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
llvm::getApproximateEVTForLLT
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
Definition: LowLevelType.cpp:54
llvm::GISelAddressing::instMayAlias
bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other, MachineRegisterInfo &MRI, AliasAnalysis *AA)
Returns true if the instruction MI may alias Other.
Definition: LoadStoreOpt.cpp:187
MORE
#define MORE()
Definition: regcomp.c:252
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::LoadStoreOpt::LoadStoreOpt
LoadStoreOpt()
Definition: LoadStoreOpt.cpp:59
llvm::LoadStoreOpt::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: LoadStoreOpt.cpp:659
llvm::TargetSubtargetInfo::getLegalizerInfo
virtual const LegalizerInfo * getLegalizerInfo() const
Definition: TargetSubtargetInfo.h:121
llvm::MachineFunctionProperties::Property::FailedISel
@ FailedISel
std
Definition: BitVector.h:851
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:606
llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition: MIPatternMatch.h:90
MachineFrameInfo.h
llvm::GISelAddressing::BaseIndexOffset::Offset
int64_t Offset
Definition: LoadStoreOpt.h:39
llvm::LLT::getSizeInBytes
TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelTypeImpl.h:162
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
Casting.h
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:774
llvm::GISelAddressing::aliasIsKnownForLoadStore
bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2, bool &IsAlias, MachineRegisterInfo &MRI)
Compute whether or not a memory access at MI1 aliases with an access at MI2.
Definition: LoadStoreOpt.cpp:102
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
AA
MaxStoreSizeToForm
const unsigned MaxStoreSizeToForm
Definition: LoadStoreOpt.cpp:48
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:215
llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition: TargetSubtargetInfo.h:97
llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition: MachineRegisterInfo.h:740
llvm::Register::isValid
bool isValid() const
Definition: Register.h:126
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1347
llvm::GLoadStore::getMMO
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
Definition: GenericMachineInstrs.h:47
llvm::LoadStoreOpt::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LoadStoreOpt.cpp:74
llvm::LegalizeActions::Unsupported
@ Unsupported
This operation is completely unsupported on the target.
Definition: LegalizerInfo.h:90
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
Register.h
llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:25
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineIRBuilder::buildStore
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
Definition: MachineIRBuilder.cpp:415
MachineFunction.h
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:42
InitializePasses.h
Debug.h
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1225
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
llvm::MachineFunctionProperties::Property::Legalized
@ Legalized
llvm::LLT
Definition: LowLevelTypeImpl.h:39