LLVM  16.0.0git
LoadStoreOpt.cpp
Go to the documentation of this file.
1 //===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the LoadStoreOpt optimization pass.
10 //===----------------------------------------------------------------------===//
11 
13 #include "llvm/ADT/Statistic.h"
28 #include "llvm/CodeGen/Register.h"
32 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
38 #include <algorithm>
39 
40 #define DEBUG_TYPE "loadstore-opt"
41 
42 using namespace llvm;
43 using namespace ore;
44 using namespace MIPatternMatch;
45 
46 STATISTIC(NumStoresMerged, "Number of stores merged");
47 
48 const unsigned MaxStoreSizeToForm = 128;
49 
50 char LoadStoreOpt::ID = 0;
51 INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
52  false, false)
55 
57  : MachineFunctionPass(ID), DoNotRunPass(F) {}
58 
60  : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
61 
62 void LoadStoreOpt::init(MachineFunction &MF) {
63  this->MF = &MF;
64  MRI = &MF.getRegInfo();
65  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
66  TLI = MF.getSubtarget().getTargetLowering();
67  LI = MF.getSubtarget().getLegalizerInfo();
68  Builder.setMF(MF);
69  IsPreLegalizer = !MF.getProperties().hasProperty(
71  InstsToErase.clear();
72 }
73 
76  AU.setPreservesAll();
79 }
80 
84  Register PtrAddRHS;
85  if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
86  Info.BaseReg = Ptr;
87  Info.IndexReg = Register();
88  Info.IsIndexSignExt = false;
89  return Info;
90  }
91 
92  auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
93  if (RHSCst)
94  Info.Offset = RHSCst->Value.getSExtValue();
95 
96  // Just recognize a simple case for now. In future we'll need to match
97  // indexing patterns for base + index + constant.
98  Info.IndexReg = PtrAddRHS;
99  Info.IsIndexSignExt = false;
100  return Info;
101 }
102 
104  const MachineInstr &MI2,
105  bool &IsAlias,
107  auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
108  auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
109  if (!LdSt1 || !LdSt2)
110  return false;
111 
112  BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
113  BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
114 
115  if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
116  return false;
117 
118  int64_t Size1 = LdSt1->getMemSize();
119  int64_t Size2 = LdSt2->getMemSize();
120 
121  int64_t PtrDiff;
122  if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
123  PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
124  // If the size of memory access is unknown, do not use it to do analysis.
125  // One example of unknown size memory access is to load/store scalable
126  // vector objects on the stack.
127  // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
128  // following situations arise:
129  if (PtrDiff >= 0 &&
130  Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
131  // [----BasePtr0----]
132  // [---BasePtr1--]
133  // ========PtrDiff========>
134  IsAlias = !(Size1 <= PtrDiff);
135  return true;
136  }
137  if (PtrDiff < 0 &&
138  Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
139  // [----BasePtr0----]
140  // [---BasePtr1--]
141  // =====(-PtrDiff)====>
142  IsAlias = !((PtrDiff + Size2) <= 0);
143  return true;
144  }
145  return false;
146  }
147 
148  // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
149  // able to calculate their relative offset if at least one arises
150  // from an alloca. However, these allocas cannot overlap and we
151  // can infer there is no alias.
152  auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
153  auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
154  if (!Base0Def || !Base1Def)
155  return false; // Couldn't tell anything.
156 
157 
158  if (Base0Def->getOpcode() != Base1Def->getOpcode())
159  return false;
160 
161  if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
162  MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
163  // If the bases have the same frame index but we couldn't find a
164  // constant offset, (indices are different) be conservative.
165  if (Base0Def != Base1Def &&
166  (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
167  !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
168  IsAlias = false;
169  return true;
170  }
171  }
172 
173  // This implementation is a lot more primitive than the SDAG one for now.
174  // FIXME: what about constant pools?
175  if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
176  auto GV0 = Base0Def->getOperand(1).getGlobal();
177  auto GV1 = Base1Def->getOperand(1).getGlobal();
178  if (GV0 != GV1) {
179  IsAlias = false;
180  return true;
181  }
182  }
183 
184  // Can't tell anything about aliasing.
185  return false;
186 }
187 
189  const MachineInstr &Other,
191  AliasAnalysis *AA) {
192  struct MemUseCharacteristics {
193  bool IsVolatile;
194  bool IsAtomic;
195  Register BasePtr;
196  int64_t Offset;
197  uint64_t NumBytes;
198  MachineMemOperand *MMO;
199  };
200 
201  auto getCharacteristics =
202  [&](const MachineInstr *MI) -> MemUseCharacteristics {
203  if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
204  Register BaseReg;
205  int64_t Offset = 0;
206  // No pre/post-inc addressing modes are considered here, unlike in SDAG.
207  if (!mi_match(LS->getPointerReg(), MRI,
208  m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
209  BaseReg = LS->getPointerReg();
210  Offset = 0;
211  }
212 
214  LS->getMMO().getMemoryType().getSizeInBytes());
215  return {LS->isVolatile(), LS->isAtomic(), BaseReg,
216  Offset /*base offset*/, Size, &LS->getMMO()};
217  }
218  // FIXME: support recognizing lifetime instructions.
219  // Default.
220  return {false /*isvolatile*/,
221  /*isAtomic*/ false, Register(),
222  (int64_t)0 /*offset*/, 0 /*size*/,
223  (MachineMemOperand *)nullptr};
224  };
225  MemUseCharacteristics MUC0 = getCharacteristics(&MI),
226  MUC1 = getCharacteristics(&Other);
227 
228  // If they are to the same address, then they must be aliases.
229  if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
230  MUC0.Offset == MUC1.Offset)
231  return true;
232 
233  // If they are both volatile then they cannot be reordered.
234  if (MUC0.IsVolatile && MUC1.IsVolatile)
235  return true;
236 
237  // Be conservative about atomics for the moment
238  // TODO: This is way overconservative for unordered atomics (see D66309)
239  if (MUC0.IsAtomic && MUC1.IsAtomic)
240  return true;
241 
242  // If one operation reads from invariant memory, and the other may store, they
243  // cannot alias.
244  if (MUC0.MMO && MUC1.MMO) {
245  if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
246  (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
247  return false;
248  }
249 
250  // Try to prove that there is aliasing, or that there is no aliasing. Either
251  // way, we can return now. If nothing can be proved, proceed with more tests.
252  bool IsAlias;
254  return IsAlias;
255 
256  // The following all rely on MMO0 and MMO1 being valid.
257  if (!MUC0.MMO || !MUC1.MMO)
258  return true;
259 
260  // FIXME: port the alignment based alias analysis from SDAG's isAlias().
261  int64_t SrcValOffset0 = MUC0.MMO->getOffset();
262  int64_t SrcValOffset1 = MUC1.MMO->getOffset();
263  uint64_t Size0 = MUC0.NumBytes;
264  uint64_t Size1 = MUC1.NumBytes;
265  if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
266  Size0 != MemoryLocation::UnknownSize &&
267  Size1 != MemoryLocation::UnknownSize) {
268  // Use alias analysis information.
269  int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
270  int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
271  int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
272  if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
273  MUC0.MMO->getAAInfo()),
274  MemoryLocation(MUC1.MMO->getValue(), Overlap1,
275  MUC1.MMO->getAAInfo())))
276  return false;
277  }
278 
279  // Otherwise we have to assume they alias.
280  return true;
281 }
282 
283 /// Returns true if the instruction creates an unavoidable hazard that
284 /// forces a boundary between store merge candidates.
286  return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
287 }
288 
289 bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
290  // Try to merge all the stores in the vector, splitting into separate segments
291  // as necessary.
292  assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
293  LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
294  LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
295  unsigned AS = PtrTy.getAddressSpace();
296  // Ensure the legal store info is computed for this address space.
297  initializeStoreMergeTargetInfo(AS);
298  const auto &LegalSizes = LegalStoreSizes[AS];
299 
300 #ifndef NDEBUG
301  for (auto *StoreMI : StoresToMerge)
302  assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
303 #endif
304 
305  const auto &DL = MF->getFunction().getParent()->getDataLayout();
306  bool AnyMerged = false;
307  do {
308  unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
309  unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize();
310  // Compute the biggest store we can generate to handle the number of stores.
311  unsigned MergeSizeBits;
312  for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
313  LLT StoreTy = LLT::scalar(MergeSizeBits);
314  EVT StoreEVT =
316  if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
317  TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
318  (TLI->isTypeLegal(StoreEVT)))
319  break; // We can generate a MergeSize bits store.
320  }
321  if (MergeSizeBits <= OrigTy.getSizeInBits())
322  return AnyMerged; // No greater merge.
323 
324  unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
325  // Perform the actual merging.
326  SmallVector<GStore *, 8> SingleMergeStores(
327  StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
328  AnyMerged |= doSingleStoreMerge(SingleMergeStores);
329  StoresToMerge.erase(StoresToMerge.begin(),
330  StoresToMerge.begin() + NumStoresToMerge);
331  } while (StoresToMerge.size() > 1);
332  return AnyMerged;
333 }
334 
335 bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
336  MachineFunction &MF) const {
337  auto Action = LI->getAction(Query).Action;
338  // If the instruction is unsupported, it can't be legalized at all.
339  if (Action == LegalizeActions::Unsupported)
340  return false;
341  return IsPreLegalizer || Action == LegalizeAction::Legal;
342 }
343 
344 bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
345  assert(Stores.size() > 1);
346  // We know that all the stores are consecutive and there are no aliasing
347  // operations in the range. However, the values that are being stored may be
348  // generated anywhere before each store. To ensure we have the values
349  // available, we materialize the wide value and new store at the place of the
350  // final store in the merge sequence.
351  GStore *FirstStore = Stores[0];
352  const unsigned NumStores = Stores.size();
353  LLT SmallTy = MRI->getType(FirstStore->getValueReg());
354  LLT WideValueTy =
355  LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize());
356 
357  // For each store, compute pairwise merged debug locs.
358  DebugLoc MergedLoc = Stores.front()->getDebugLoc();
359  for (auto *Store : drop_begin(Stores))
360  MergedLoc = DILocation::getMergedLocation(MergedLoc, Store->getDebugLoc());
361 
362  Builder.setInstr(*Stores.back());
363  Builder.setDebugLoc(MergedLoc);
364 
365  // If all of the store values are constants, then create a wide constant
366  // directly. Otherwise, we need to generate some instructions to merge the
367  // existing values together into a wider type.
368  SmallVector<APInt, 8> ConstantVals;
369  for (auto *Store : Stores) {
370  auto MaybeCst =
371  getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
372  if (!MaybeCst) {
373  ConstantVals.clear();
374  break;
375  }
376  ConstantVals.emplace_back(MaybeCst->Value);
377  }
378 
379  Register WideReg;
380  auto *WideMMO =
381  MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
382  if (ConstantVals.empty()) {
383  // Mimic the SDAG behaviour here and don't try to do anything for unknown
384  // values. In future, we should also support the cases of loads and
385  // extracted vector elements.
386  return false;
387  }
388 
389  assert(ConstantVals.size() == NumStores);
390  // Check if our wide constant is legal.
391  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
392  return false;
393  APInt WideConst(WideValueTy.getSizeInBits(), 0);
394  for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
395  // Insert the smaller constant into the corresponding position in the
396  // wider one.
397  WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
398  }
399  WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
400  auto NewStore =
401  Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
402  (void) NewStore;
403  LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
404  NumStoresMerged += Stores.size();
405 
407  MORE.emit([&]() {
408  MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
409  FirstStore->getDebugLoc(),
410  FirstStore->getParent());
411  R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
412  << NV("OrigWidth", SmallTy.getSizeInBytes())
413  << " bytes into a single store of "
414  << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
415  return R;
416  });
417 
418  for (auto *MI : Stores)
419  InstsToErase.insert(MI);
420  return true;
421 }
422 
423 bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
424  if (C.Stores.size() < 2) {
425  C.reset();
426  return false;
427  }
428 
429  LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
430  << " stores, starting with " << *C.Stores[0]);
431  // We know that the stores in the candidate are adjacent.
432  // Now we need to check if any potential aliasing instructions recorded
433  // during the search alias with load/stores added to the candidate after.
434  // For example, if we have the candidate:
435  // C.Stores = [ST1, ST2, ST3, ST4]
436  // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
437  // ST2, then we would have recorded it into the PotentialAliases structure
438  // with the associated index value of "1". Then we see ST3 and ST4 and add
439  // them to the candidate group. We know that LD1 does not alias with ST1 or
440  // ST2, since we already did that check. However we don't yet know if it
441  // may alias ST3 and ST4, so we perform those checks now.
442  SmallVector<GStore *> StoresToMerge;
443 
444  auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
445  for (auto AliasInfo : reverse(C.PotentialAliases)) {
446  MachineInstr *PotentialAliasOp = AliasInfo.first;
447  unsigned PreCheckedIdx = AliasInfo.second;
448  if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
449  // Need to check this alias.
450  if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
451  AA)) {
452  LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
453  << " detected\n");
454  return true;
455  }
456  } else {
457  // Once our store index is lower than the index associated with the
458  // potential alias, we know that we've already checked for this alias
459  // and all of the earlier potential aliases too.
460  return false;
461  }
462  }
463  return false;
464  };
465  // Start from the last store in the group, and check if it aliases with any
466  // of the potential aliasing operations in the list.
467  for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
468  auto *CheckStore = C.Stores[StoreIdx];
469  if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
470  continue;
471  StoresToMerge.emplace_back(CheckStore);
472  }
473 
474  LLVM_DEBUG(dbgs() << StoresToMerge.size()
475  << " stores remaining after alias checks. Merging...\n");
476 
477  // Now we've checked for aliasing hazards, merge any stores left.
478  C.reset();
479  if (StoresToMerge.size() < 2)
480  return false;
481  return mergeStores(StoresToMerge);
482 }
483 
484 bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
485  StoreMergeCandidate &C) {
486  if (C.Stores.empty())
487  return false;
488  return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
489  return instMayAlias(MI, *OtherMI, *MRI, AA);
490  });
491 }
492 
493 void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
494  PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
495 }
496 
497 bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
498  StoreMergeCandidate &C) {
499  // Check if the given store writes to an adjacent address, and other
500  // requirements.
501  LLT ValueTy = MRI->getType(StoreMI.getValueReg());
502  LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
503 
504  // Only handle scalars.
505  if (!ValueTy.isScalar())
506  return false;
507 
508  // Don't allow truncating stores for now.
509  if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
510  return false;
511 
512  // Avoid adding volatile or ordered stores to the candidate. We already have a
513  // check for this in instMayAlias() but that only get's called later between
514  // potential aliasing hazards.
515  if (!StoreMI.isSimple())
516  return false;
517 
518  Register StoreAddr = StoreMI.getPointerReg();
519  auto BIO = getPointerInfo(StoreAddr, *MRI);
520  Register StoreBase = BIO.BaseReg;
521  uint64_t StoreOffCst = BIO.Offset;
522  if (C.Stores.empty()) {
523  // This is the first store of the candidate.
524  // If the offset can't possibly allow for a lower addressed store with the
525  // same base, don't bother adding it.
526  if (StoreOffCst < ValueTy.getSizeInBytes())
527  return false;
528  C.BasePtr = StoreBase;
529  C.CurrentLowestOffset = StoreOffCst;
530  C.Stores.emplace_back(&StoreMI);
531  LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
532  << StoreMI);
533  return true;
534  }
535 
536  // Check the store is the same size as the existing ones in the candidate.
537  if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
538  ValueTy.getSizeInBits())
539  return false;
540 
541  if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
542  PtrTy.getAddressSpace())
543  return false;
544 
545  // There are other stores in the candidate. Check that the store address
546  // writes to the next lowest adjacent address.
547  if (C.BasePtr != StoreBase)
548  return false;
549  if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
550  static_cast<uint64_t>(StoreOffCst))
551  return false;
552 
553  // This writes to an adjacent address. Allow it.
554  C.Stores.emplace_back(&StoreMI);
555  C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
556  LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
557  return true;
558 }
559 
560 bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
561  bool Changed = false;
562  // Walk through the block bottom-up, looking for merging candidates.
563  StoreMergeCandidate Candidate;
564  for (MachineInstr &MI : llvm::reverse(MBB)) {
565  if (InstsToErase.contains(&MI))
566  continue;
567 
568  if (auto *StoreMI = dyn_cast<GStore>(&MI)) {
569  // We have a G_STORE. Add it to the candidate if it writes to an adjacent
570  // address.
571  if (!addStoreToCandidate(*StoreMI, Candidate)) {
572  // Store wasn't eligible to be added. May need to record it as a
573  // potential alias.
574  if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
575  Changed |= processMergeCandidate(Candidate);
576  continue;
577  }
578  Candidate.addPotentialAlias(*StoreMI);
579  }
580  continue;
581  }
582 
583  // If we don't have any stores yet, this instruction can't pose a problem.
584  if (Candidate.Stores.empty())
585  continue;
586 
587  // We're dealing with some other kind of instruction.
588  if (isInstHardMergeHazard(MI)) {
589  Changed |= processMergeCandidate(Candidate);
590  Candidate.Stores.clear();
591  continue;
592  }
593 
594  if (!MI.mayLoadOrStore())
595  continue;
596 
597  if (operationAliasesWithCandidate(MI, Candidate)) {
598  // We have a potential alias, so process the current candidate if we can
599  // and then continue looking for a new candidate.
600  Changed |= processMergeCandidate(Candidate);
601  continue;
602  }
603 
604  // Record this instruction as a potential alias for future stores that are
605  // added to the candidate.
606  Candidate.addPotentialAlias(MI);
607  }
608 
609  // Process any candidate left after finishing searching the entire block.
610  Changed |= processMergeCandidate(Candidate);
611 
612  // Erase instructions now that we're no longer iterating over the block.
613  for (auto *MI : InstsToErase)
614  MI->eraseFromParent();
615  InstsToErase.clear();
616  return Changed;
617 }
618 
619 bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
620  bool Changed = false;
621  for (auto &BB : MF) {
622  Changed |= mergeBlockStores(BB);
623  }
624  return Changed;
625 }
626 
627 void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
628  // Query the legalizer info to record what store types are legal.
629  // We record this because we don't want to bother trying to merge stores into
630  // illegal ones, which would just result in being split again.
631 
632  if (LegalStoreSizes.count(AddrSpace)) {
633  assert(LegalStoreSizes[AddrSpace].any());
634  return; // Already cached sizes for this address space.
635  }
636 
637  // Need to reserve at least MaxStoreSizeToForm + 1 bits.
638  BitVector LegalSizes(MaxStoreSizeToForm * 2);
639  const auto &LI = *MF->getSubtarget().getLegalizerInfo();
640  const auto &DL = MF->getFunction().getParent()->getDataLayout();
641  Type *IntPtrIRTy =
642  DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
643  LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
644  // We assume that we're not going to be generating any stores wider than
645  // MaxStoreSizeToForm bits for now.
646  for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
647  LLT Ty = LLT::scalar(Size);
650  SmallVector<LLT> StoreTys({Ty, PtrTy});
651  LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
652  LegalizeActionStep ActionStep = LI.getAction(Q);
653  if (ActionStep.Action == LegalizeActions::Legal)
654  LegalSizes.set(Size);
655  }
656  assert(LegalSizes.any() && "Expected some store sizes to be legal!");
657  LegalStoreSizes[AddrSpace] = LegalSizes;
658 }
659 
661  // If the ISel pipeline failed, do not bother running that pass.
662  if (MF.getProperties().hasProperty(
664  return false;
665 
666  LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
667  << '\n');
668 
669  init(MF);
670  bool Changed = false;
671  Changed |= mergeFunctionStores(MF);
672 
673  LegalStoreSizes.clear();
674  return Changed;
675 }
llvm::GISelAddressing::BaseIndexOffset
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:36
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
MIPatternMatch.h
llvm::MachineIRBuilder::setMF
void setMF(MachineFunction &MF)
Definition: MachineIRBuilder.cpp:24
llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:192
LowLevelType.h
llvm::getDefIgnoringCopies
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:462
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::MachineIRBuilder::setDebugLoc
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
Definition: MachineIRBuilder.h:365
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:387
llvm::DILocation::getMergedLocation
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition: DebugInfoMetadata.cpp:107
AtomicOrdering.h
llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:268
DebugInfoMetadata.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:444
llvm::LegalizeActionStep::Action
LegalizeAction Action
The action to take or the final answer.
Definition: LegalizerInfo.h:144
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoadStoreOpt.cpp:40
llvm::LegalizeActionStep
The result of a query.
Definition: LegalizerInfo.h:142
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:454
ErrorHandling.h
llvm::GStore::getValueReg
Register getValueReg() const
Get the stored value register.
Definition: GenericMachineInstrs.h:133
llvm::AAResults::isNoAlias
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Definition: AliasAnalysis.h:348
llvm::MachineOptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: MachineOptimizationRemarkEmitter.h:151
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
OptimizationRemarkEmitter.h
MachineBasicBlock.h
llvm::GISelAddressing::getPointerInfo
BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI)
Returns a BaseIndexOffset which describes the pointer in Ptr.
Definition: LoadStoreOpt.cpp:81
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::GLoadStore::getMemSizeInBits
uint64_t getMemSizeInBits() const
Returns the size in bits of the memory access.
Definition: GenericMachineInstrs.h:63
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
GenericMachineInstrs.h
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:895
llvm::MachineIRBuilder::setInstr
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Definition: MachineIRBuilder.h:342
LegalizerInfo.h
llvm::TargetLoweringBase::canMergeStoresTo
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
Definition: TargetLowering.h:621
llvm::MemoryLocation::getSizeOrUnknown
static uint64_t getSizeOrUnknown(const TypeSize &T)
Definition: MemoryLocation.h:285
llvm::LegalityPredicates::any
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
Definition: LegalizerInfo.h:241
llvm::MIPatternMatch::m_GPtrAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:468
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MachineIRBuilder::buildConstant
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Definition: MachineIRBuilder.cpp:293
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
MachineRegisterInfo.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations", false, false) INITIALIZE_PASS_END(LoadStoreOpt
AliasAnalysis.h
llvm::getLLTForType
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
Definition: LowLevelType.cpp:20
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineOptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:57
TargetLowering.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::PowerOf2Floor
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:623
llvm::AAResults
Definition: AliasAnalysis.h:294
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::LLT::getSizeInBits
TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelTypeImpl.h:152
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Utils.h
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:748
false
Definition: StackSlotColoring.cpp:141
llvm::GLoadStore::isSimple
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Definition: GenericMachineInstrs.h:54
TargetOpcodes.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::LoadStoreOpt
Definition: LoadStoreOpt.h:62
llvm::BitVector
Definition: BitVector.h:75
llvm::MachineFrameInfo::isFixedObjectIndex
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Definition: MachineFrameInfo.h:688
llvm::GLoadStore::getPointerReg
Register getPointerReg() const
Get the source register of the pointer value.
Definition: GenericMachineInstrs.h:44
llvm::LegalizeActions::Legal
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:46
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
MachineOptimizationRemarkEmitter.h
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
llvm::GISelAddressing::BaseIndexOffset::BaseReg
Register BaseReg
Definition: LoadStoreOpt.h:37
llvm::LLT::getAddressSpace
unsigned getAddressSpace() const
Definition: LowLevelTypeImpl.h:238
llvm::GStore
Represents a G_STORE.
Definition: GenericMachineInstrs.h:130
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
optimizations
Generic memory optimizations
Definition: LoadStoreOpt.cpp:53
llvm::IRSimilarity::Legal
@ Legal
Definition: IRSimilarityIdentifier.h:77
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:939
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
LoadStoreOpt.h
MemoryLocation.h
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
isInstHardMergeHazard
static bool isInstHardMergeHazard(MachineInstr &MI)
Returns true if the instruction creates an unavoidable hazard that forces a boundary between store me...
Definition: LoadStoreOpt.cpp:285
llvm::LegalizerInfo::getAction
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
Definition: LegalizerInfo.cpp:319
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::LLT::isScalar
bool isScalar() const
Definition: LowLevelTypeImpl.h:118
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::LegalityQuery
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Definition: LegalizerInfo.h:108
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
llvm::getIConstantVRegValWithLookThrough
Optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:410
llvm::LoadStoreOpt::ID
static char ID
Definition: LoadStoreOpt.h:64
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
llvm::getApproximateEVTForLLT
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
Definition: LowLevelType.cpp:54
llvm::GISelAddressing::instMayAlias
bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other, MachineRegisterInfo &MRI, AliasAnalysis *AA)
Returns true if the instruction MI may alias Other.
Definition: LoadStoreOpt.cpp:188
MORE
#define MORE()
Definition: regcomp.c:252
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::LoadStoreOpt::LoadStoreOpt
LoadStoreOpt()
Definition: LoadStoreOpt.cpp:59
llvm::LoadStoreOpt::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition: LoadStoreOpt.cpp:660
llvm::TargetSubtargetInfo::getLegalizerInfo
virtual const LegalizerInfo * getLegalizerInfo() const
Definition: TargetSubtargetInfo.h:123
llvm::MachineFunctionProperties::Property::FailedISel
@ FailedISel
std
Definition: BitVector.h:851
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition: MIPatternMatch.h:92
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
MachineFrameInfo.h
llvm::GISelAddressing::BaseIndexOffset::Offset
int64_t Offset
Definition: LoadStoreOpt.h:39
llvm::LLT::getSizeInBytes
TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelTypeImpl.h:162
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
Casting.h
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:774
llvm::GISelAddressing::aliasIsKnownForLoadStore
bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2, bool &IsAlias, MachineRegisterInfo &MRI)
Compute whether or not a memory access at MI1 aliases with an access at MI2.
Definition: LoadStoreOpt.cpp:103
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
MaxStoreSizeToForm
const unsigned MaxStoreSizeToForm
Definition: LoadStoreOpt.cpp:48
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:106
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:216
llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition: TargetSubtargetInfo.h:99
llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition: MachineRegisterInfo.h:745
llvm::Register::isValid
bool isValid() const
Definition: Register.h:126
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:924
llvm::GLoadStore::getMMO
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
Definition: GenericMachineInstrs.h:47
llvm::LoadStoreOpt::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LoadStoreOpt.cpp:74
llvm::LegalizeActions::Unsupported
@ Unsupported
This operation is completely unsupported on the target.
Definition: LegalizerInfo.h:90
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:399
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
Register.h
llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:25
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineIRBuilder::buildStore
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
Definition: MachineIRBuilder.cpp:425
MachineFunction.h
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:42
InitializePasses.h
Debug.h
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:211
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1251
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::MachineFunctionProperties::Property::Legalized
@ Legalized
llvm::LLT
Definition: LowLevelTypeImpl.h:39