LLVM  13.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that performs load / store related peephole
10 // optimizations. This pass should be run after register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
16 #include "AArch64Subtarget.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/StringRef.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/MC/MCAsmInfo.h"
34 #include "llvm/MC/MCRegisterInfo.h"
35 #include "llvm/Pass.h"
37 #include "llvm/Support/Debug.h"
41 #include <cassert>
42 #include <cstdint>
43 #include <functional>
44 #include <iterator>
45 #include <limits>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "aarch64-ldst-opt"
50 
51 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
52 STATISTIC(NumPostFolded, "Number of post-index updates folded");
53 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
54 STATISTIC(NumUnscaledPairCreated,
55  "Number of load/store from unscaled generated");
56 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
57 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
58 
59 DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
60  "Controls which pairs are considered for renaming");
61 
62 // The LdStLimit limits how far we search for load/store pairs.
63 static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
64  cl::init(20), cl::Hidden);
65 
66 // The UpdateLimit limits how far we search for update instructions when we form
67 // pre-/post-index instructions.
68 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
69  cl::Hidden);
70 
71 // Enable register renaming to find additional store pairing opportunities.
72 static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
73  cl::init(true), cl::Hidden);
74 
75 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
76 
77 namespace {
78 
79 using LdStPairFlags = struct LdStPairFlags {
80  // If a matching instruction is found, MergeForward is set to true if the
81  // merge is to remove the first instruction and replace the second with
82  // a pair-wise insn, and false if the reverse is true.
83  bool MergeForward = false;
84 
85  // SExtIdx gives the index of the result of the load pair that must be
86  // extended. The value of SExtIdx assumes that the paired load produces the
87  // value in this order: (I, returned iterator), i.e., -1 means no value has
88  // to be extended, 0 means I, and 1 means the returned iterator.
89  int SExtIdx = -1;
90 
91  // If not none, RenameReg can be used to rename the result register of the
92  // first store in a pair. Currently this only works when merging stores
93  // forward.
94  Optional<MCPhysReg> RenameReg = None;
95 
96  LdStPairFlags() = default;
97 
98  void setMergeForward(bool V = true) { MergeForward = V; }
99  bool getMergeForward() const { return MergeForward; }
100 
101  void setSExtIdx(int V) { SExtIdx = V; }
102  int getSExtIdx() const { return SExtIdx; }
103 
104  void setRenameReg(MCPhysReg R) { RenameReg = R; }
105  void clearRenameReg() { RenameReg = None; }
106  Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
107 };
108 
109 struct AArch64LoadStoreOpt : public MachineFunctionPass {
110  static char ID;
111 
112  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
114  }
115 
116  AliasAnalysis *AA;
117  const AArch64InstrInfo *TII;
118  const TargetRegisterInfo *TRI;
119  const AArch64Subtarget *Subtarget;
120 
121  // Track which register units have been modified and used.
122  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
123  LiveRegUnits DefinedInBB;
124 
125  void getAnalysisUsage(AnalysisUsage &AU) const override {
128  }
129 
130  // Scan the instructions looking for a load/store that can be combined
131  // with the current instruction into a load/store pair.
132  // Return the matching instruction if one is found, else MBB->end().
134  LdStPairFlags &Flags,
135  unsigned Limit,
136  bool FindNarrowMerge);
137 
138  // Scan the instructions looking for a store that writes to the address from
139  // which the current load instruction reads. Return true if one is found.
140  bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
142 
143  // Merge the two instructions indicated into a wider narrow store instruction.
145  mergeNarrowZeroStores(MachineBasicBlock::iterator I,
147  const LdStPairFlags &Flags);
148 
149  // Merge the two instructions indicated into a single pair-wise instruction.
151  mergePairedInsns(MachineBasicBlock::iterator I,
153  const LdStPairFlags &Flags);
154 
155  // Promote the load that reads directly from the address stored to.
157  promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
159 
160  // Scan the instruction list to find a base register update that can
161  // be combined with the current instruction (a load or store) using
162  // pre or post indexed addressing with writeback. Scan forwards.
164  findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
165  int UnscaledOffset, unsigned Limit);
166 
167  // Scan the instruction list to find a base register update that can
168  // be combined with the current instruction (a load or store) using
169  // pre or post indexed addressing with writeback. Scan backwards.
171  findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
172 
173  // Find an instruction that updates the base register of the ld/st
174  // instruction.
175  bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
176  unsigned BaseReg, int Offset);
177 
178  // Merge a pre- or post-index base register update into a ld/st instruction.
180  mergeUpdateInsn(MachineBasicBlock::iterator I,
181  MachineBasicBlock::iterator Update, bool IsPreIdx);
182 
183  // Find and merge zero store instructions.
184  bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
185 
186  // Find and pair ldr/str instructions.
187  bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
188 
189  // Find and promote load instructions which read directly from store.
190  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
191 
192  // Find and merge a base register updates before or after a ld/st instruction.
193  bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
194 
195  bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
196 
197  bool runOnMachineFunction(MachineFunction &Fn) override;
198 
199  MachineFunctionProperties getRequiredProperties() const override {
202  }
203 
204  StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
205 };
206 
207 char AArch64LoadStoreOpt::ID = 0;
208 
209 } // end anonymous namespace
210 
211 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
212  AARCH64_LOAD_STORE_OPT_NAME, false, false)
213 
214 static bool isNarrowStore(unsigned Opc) {
215  switch (Opc) {
216  default:
217  return false;
218  case AArch64::STRBBui:
219  case AArch64::STURBBi:
220  case AArch64::STRHHui:
221  case AArch64::STURHHi:
222  return true;
223  }
224 }
225 
226 // These instruction set memory tag and either keep memory contents unchanged or
227 // set it to zero, ignoring the address part of the source register.
228 static bool isTagStore(const MachineInstr &MI) {
229  switch (MI.getOpcode()) {
230  default:
231  return false;
232  case AArch64::STGOffset:
233  case AArch64::STZGOffset:
234  case AArch64::ST2GOffset:
235  case AArch64::STZ2GOffset:
236  return true;
237  }
238 }
239 
240 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
241  bool *IsValidLdStrOpc = nullptr) {
242  if (IsValidLdStrOpc)
243  *IsValidLdStrOpc = true;
244  switch (Opc) {
245  default:
246  if (IsValidLdStrOpc)
247  *IsValidLdStrOpc = false;
249  case AArch64::STRDui:
250  case AArch64::STURDi:
251  case AArch64::STRQui:
252  case AArch64::STURQi:
253  case AArch64::STRBBui:
254  case AArch64::STURBBi:
255  case AArch64::STRHHui:
256  case AArch64::STURHHi:
257  case AArch64::STRWui:
258  case AArch64::STURWi:
259  case AArch64::STRXui:
260  case AArch64::STURXi:
261  case AArch64::LDRDui:
262  case AArch64::LDURDi:
263  case AArch64::LDRQui:
264  case AArch64::LDURQi:
265  case AArch64::LDRWui:
266  case AArch64::LDURWi:
267  case AArch64::LDRXui:
268  case AArch64::LDURXi:
269  case AArch64::STRSui:
270  case AArch64::STURSi:
271  case AArch64::LDRSui:
272  case AArch64::LDURSi:
273  return Opc;
274  case AArch64::LDRSWui:
275  return AArch64::LDRWui;
276  case AArch64::LDURSWi:
277  return AArch64::LDURWi;
278  }
279 }
280 
281 static unsigned getMatchingWideOpcode(unsigned Opc) {
282  switch (Opc) {
283  default:
284  llvm_unreachable("Opcode has no wide equivalent!");
285  case AArch64::STRBBui:
286  return AArch64::STRHHui;
287  case AArch64::STRHHui:
288  return AArch64::STRWui;
289  case AArch64::STURBBi:
290  return AArch64::STURHHi;
291  case AArch64::STURHHi:
292  return AArch64::STURWi;
293  case AArch64::STURWi:
294  return AArch64::STURXi;
295  case AArch64::STRWui:
296  return AArch64::STRXui;
297  }
298 }
299 
300 static unsigned getMatchingPairOpcode(unsigned Opc) {
301  switch (Opc) {
302  default:
303  llvm_unreachable("Opcode has no pairwise equivalent!");
304  case AArch64::STRSui:
305  case AArch64::STURSi:
306  return AArch64::STPSi;
307  case AArch64::STRDui:
308  case AArch64::STURDi:
309  return AArch64::STPDi;
310  case AArch64::STRQui:
311  case AArch64::STURQi:
312  return AArch64::STPQi;
313  case AArch64::STRWui:
314  case AArch64::STURWi:
315  return AArch64::STPWi;
316  case AArch64::STRXui:
317  case AArch64::STURXi:
318  return AArch64::STPXi;
319  case AArch64::LDRSui:
320  case AArch64::LDURSi:
321  return AArch64::LDPSi;
322  case AArch64::LDRDui:
323  case AArch64::LDURDi:
324  return AArch64::LDPDi;
325  case AArch64::LDRQui:
326  case AArch64::LDURQi:
327  return AArch64::LDPQi;
328  case AArch64::LDRWui:
329  case AArch64::LDURWi:
330  return AArch64::LDPWi;
331  case AArch64::LDRXui:
332  case AArch64::LDURXi:
333  return AArch64::LDPXi;
334  case AArch64::LDRSWui:
335  case AArch64::LDURSWi:
336  return AArch64::LDPSWi;
337  }
338 }
339 
342  unsigned LdOpc = LoadInst.getOpcode();
343  unsigned StOpc = StoreInst.getOpcode();
344  switch (LdOpc) {
345  default:
346  llvm_unreachable("Unsupported load instruction!");
347  case AArch64::LDRBBui:
348  return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
349  StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
350  case AArch64::LDURBBi:
351  return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
352  StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
353  case AArch64::LDRHHui:
354  return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
355  StOpc == AArch64::STRXui;
356  case AArch64::LDURHHi:
357  return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
358  StOpc == AArch64::STURXi;
359  case AArch64::LDRWui:
360  return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
361  case AArch64::LDURWi:
362  return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
363  case AArch64::LDRXui:
364  return StOpc == AArch64::STRXui;
365  case AArch64::LDURXi:
366  return StOpc == AArch64::STURXi;
367  }
368 }
369 
370 static unsigned getPreIndexedOpcode(unsigned Opc) {
371  // FIXME: We don't currently support creating pre-indexed loads/stores when
372  // the load or store is the unscaled version. If we decide to perform such an
373  // optimization in the future the cases for the unscaled loads/stores will
374  // need to be added here.
375  switch (Opc) {
376  default:
377  llvm_unreachable("Opcode has no pre-indexed equivalent!");
378  case AArch64::STRSui:
379  return AArch64::STRSpre;
380  case AArch64::STRDui:
381  return AArch64::STRDpre;
382  case AArch64::STRQui:
383  return AArch64::STRQpre;
384  case AArch64::STRBBui:
385  return AArch64::STRBBpre;
386  case AArch64::STRHHui:
387  return AArch64::STRHHpre;
388  case AArch64::STRWui:
389  return AArch64::STRWpre;
390  case AArch64::STRXui:
391  return AArch64::STRXpre;
392  case AArch64::LDRSui:
393  return AArch64::LDRSpre;
394  case AArch64::LDRDui:
395  return AArch64::LDRDpre;
396  case AArch64::LDRQui:
397  return AArch64::LDRQpre;
398  case AArch64::LDRBBui:
399  return AArch64::LDRBBpre;
400  case AArch64::LDRHHui:
401  return AArch64::LDRHHpre;
402  case AArch64::LDRWui:
403  return AArch64::LDRWpre;
404  case AArch64::LDRXui:
405  return AArch64::LDRXpre;
406  case AArch64::LDRSWui:
407  return AArch64::LDRSWpre;
408  case AArch64::LDPSi:
409  return AArch64::LDPSpre;
410  case AArch64::LDPSWi:
411  return AArch64::LDPSWpre;
412  case AArch64::LDPDi:
413  return AArch64::LDPDpre;
414  case AArch64::LDPQi:
415  return AArch64::LDPQpre;
416  case AArch64::LDPWi:
417  return AArch64::LDPWpre;
418  case AArch64::LDPXi:
419  return AArch64::LDPXpre;
420  case AArch64::STPSi:
421  return AArch64::STPSpre;
422  case AArch64::STPDi:
423  return AArch64::STPDpre;
424  case AArch64::STPQi:
425  return AArch64::STPQpre;
426  case AArch64::STPWi:
427  return AArch64::STPWpre;
428  case AArch64::STPXi:
429  return AArch64::STPXpre;
430  case AArch64::STGOffset:
431  return AArch64::STGPreIndex;
432  case AArch64::STZGOffset:
433  return AArch64::STZGPreIndex;
434  case AArch64::ST2GOffset:
435  return AArch64::ST2GPreIndex;
436  case AArch64::STZ2GOffset:
437  return AArch64::STZ2GPreIndex;
438  case AArch64::STGPi:
439  return AArch64::STGPpre;
440  }
441 }
442 
443 static unsigned getPostIndexedOpcode(unsigned Opc) {
444  switch (Opc) {
445  default:
446  llvm_unreachable("Opcode has no post-indexed wise equivalent!");
447  case AArch64::STRSui:
448  case AArch64::STURSi:
449  return AArch64::STRSpost;
450  case AArch64::STRDui:
451  case AArch64::STURDi:
452  return AArch64::STRDpost;
453  case AArch64::STRQui:
454  case AArch64::STURQi:
455  return AArch64::STRQpost;
456  case AArch64::STRBBui:
457  return AArch64::STRBBpost;
458  case AArch64::STRHHui:
459  return AArch64::STRHHpost;
460  case AArch64::STRWui:
461  case AArch64::STURWi:
462  return AArch64::STRWpost;
463  case AArch64::STRXui:
464  case AArch64::STURXi:
465  return AArch64::STRXpost;
466  case AArch64::LDRSui:
467  case AArch64::LDURSi:
468  return AArch64::LDRSpost;
469  case AArch64::LDRDui:
470  case AArch64::LDURDi:
471  return AArch64::LDRDpost;
472  case AArch64::LDRQui:
473  case AArch64::LDURQi:
474  return AArch64::LDRQpost;
475  case AArch64::LDRBBui:
476  return AArch64::LDRBBpost;
477  case AArch64::LDRHHui:
478  return AArch64::LDRHHpost;
479  case AArch64::LDRWui:
480  case AArch64::LDURWi:
481  return AArch64::LDRWpost;
482  case AArch64::LDRXui:
483  case AArch64::LDURXi:
484  return AArch64::LDRXpost;
485  case AArch64::LDRSWui:
486  return AArch64::LDRSWpost;
487  case AArch64::LDPSi:
488  return AArch64::LDPSpost;
489  case AArch64::LDPSWi:
490  return AArch64::LDPSWpost;
491  case AArch64::LDPDi:
492  return AArch64::LDPDpost;
493  case AArch64::LDPQi:
494  return AArch64::LDPQpost;
495  case AArch64::LDPWi:
496  return AArch64::LDPWpost;
497  case AArch64::LDPXi:
498  return AArch64::LDPXpost;
499  case AArch64::STPSi:
500  return AArch64::STPSpost;
501  case AArch64::STPDi:
502  return AArch64::STPDpost;
503  case AArch64::STPQi:
504  return AArch64::STPQpost;
505  case AArch64::STPWi:
506  return AArch64::STPWpost;
507  case AArch64::STPXi:
508  return AArch64::STPXpost;
509  case AArch64::STGOffset:
510  return AArch64::STGPostIndex;
511  case AArch64::STZGOffset:
512  return AArch64::STZGPostIndex;
513  case AArch64::ST2GOffset:
514  return AArch64::ST2GPostIndex;
515  case AArch64::STZ2GOffset:
516  return AArch64::STZ2GPostIndex;
517  case AArch64::STGPi:
518  return AArch64::STGPpost;
519  }
520 }
521 
522 static bool isPairedLdSt(const MachineInstr &MI) {
523  switch (MI.getOpcode()) {
524  default:
525  return false;
526  case AArch64::LDPSi:
527  case AArch64::LDPSWi:
528  case AArch64::LDPDi:
529  case AArch64::LDPQi:
530  case AArch64::LDPWi:
531  case AArch64::LDPXi:
532  case AArch64::STPSi:
533  case AArch64::STPDi:
534  case AArch64::STPQi:
535  case AArch64::STPWi:
536  case AArch64::STPXi:
537  case AArch64::STGPi:
538  return true;
539  }
540 }
541 
542 // Returns the scale and offset range of pre/post indexed variants of MI.
543 static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
544  int &MinOffset, int &MaxOffset) {
545  bool IsPaired = isPairedLdSt(MI);
546  bool IsTagStore = isTagStore(MI);
547  // ST*G and all paired ldst have the same scale in pre/post-indexed variants
548  // as in the "unsigned offset" variant.
549  // All other pre/post indexed ldst instructions are unscaled.
550  Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
551 
552  if (IsPaired) {
553  MinOffset = -64;
554  MaxOffset = 63;
555  } else {
556  MinOffset = -256;
557  MaxOffset = 255;
558  }
559 }
560 
562  unsigned PairedRegOp = 0) {
563  assert(PairedRegOp < 2 && "Unexpected register operand idx.");
564  unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
565  return MI.getOperand(Idx);
566 }
567 
569  unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
570  return MI.getOperand(Idx);
571 }
572 
574  unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
575  return MI.getOperand(Idx);
576 }
577 
580  const AArch64InstrInfo *TII) {
581  assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
582  int LoadSize = TII->getMemScale(LoadInst);
583  int StoreSize = TII->getMemScale(StoreInst);
584  int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst)
586  : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
587  int UnscaledLdOffset = TII->isUnscaledLdSt(LoadInst)
589  : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
590  return (UnscaledStOffset <= UnscaledLdOffset) &&
591  (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
592 }
593 
595  unsigned Opc = MI.getOpcode();
596  return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
597  isNarrowStore(Opc)) &&
598  getLdStRegOp(MI).getReg() == AArch64::WZR;
599 }
600 
602  switch (MI.getOpcode()) {
603  default:
604  return false;
605  // Scaled instructions.
606  case AArch64::LDRBBui:
607  case AArch64::LDRHHui:
608  case AArch64::LDRWui:
609  case AArch64::LDRXui:
610  // Unscaled instructions.
611  case AArch64::LDURBBi:
612  case AArch64::LDURHHi:
613  case AArch64::LDURWi:
614  case AArch64::LDURXi:
615  return true;
616  }
617 }
618 
620  unsigned Opc = MI.getOpcode();
621  switch (Opc) {
622  default:
623  return false;
624  // Scaled instructions.
625  case AArch64::STRSui:
626  case AArch64::STRDui:
627  case AArch64::STRQui:
628  case AArch64::STRXui:
629  case AArch64::STRWui:
630  case AArch64::STRHHui:
631  case AArch64::STRBBui:
632  case AArch64::LDRSui:
633  case AArch64::LDRDui:
634  case AArch64::LDRQui:
635  case AArch64::LDRXui:
636  case AArch64::LDRWui:
637  case AArch64::LDRHHui:
638  case AArch64::LDRBBui:
639  case AArch64::STGOffset:
640  case AArch64::STZGOffset:
641  case AArch64::ST2GOffset:
642  case AArch64::STZ2GOffset:
643  case AArch64::STGPi:
644  // Unscaled instructions.
645  case AArch64::STURSi:
646  case AArch64::STURDi:
647  case AArch64::STURQi:
648  case AArch64::STURWi:
649  case AArch64::STURXi:
650  case AArch64::LDURSi:
651  case AArch64::LDURDi:
652  case AArch64::LDURQi:
653  case AArch64::LDURWi:
654  case AArch64::LDURXi:
655  // Paired instructions.
656  case AArch64::LDPSi:
657  case AArch64::LDPSWi:
658  case AArch64::LDPDi:
659  case AArch64::LDPQi:
660  case AArch64::LDPWi:
661  case AArch64::LDPXi:
662  case AArch64::STPSi:
663  case AArch64::STPDi:
664  case AArch64::STPQi:
665  case AArch64::STPWi:
666  case AArch64::STPXi:
667  // Make sure this is a reg+imm (as opposed to an address reloc).
668  if (!getLdStOffsetOp(MI).isImm())
669  return false;
670 
671  return true;
672  }
673 }
674 
676 AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
678  const LdStPairFlags &Flags) {
680  "Expected promotable zero stores.");
681 
682  MachineBasicBlock::iterator E = I->getParent()->end();
684  // If NextI is the second of the two instructions to be merged, we need
685  // to skip one further. Either way we merge will invalidate the iterator,
686  // and we don't need to scan the new instruction, as it's a pairwise
687  // instruction, which we're not considering for further action anyway.
688  if (NextI == MergeMI)
689  NextI = next_nodbg(NextI, E);
690 
691  unsigned Opc = I->getOpcode();
692  bool IsScaled = !TII->isUnscaledLdSt(Opc);
693  int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
694 
695  bool MergeForward = Flags.getMergeForward();
696  // Insert our new paired instruction after whichever of the paired
697  // instructions MergeForward indicates.
698  MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
699  // Also based on MergeForward is from where we copy the base register operand
700  // so we get the flags compatible with the input code.
701  const MachineOperand &BaseRegOp =
702  MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
703 
704  // Which register is Rt and which is Rt2 depends on the offset order.
705  MachineInstr *RtMI;
706  if (getLdStOffsetOp(*I).getImm() ==
707  getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
708  RtMI = &*MergeMI;
709  else
710  RtMI = &*I;
711 
712  int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
713  // Change the scaled offset from small to large type.
714  if (IsScaled) {
715  assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
716  OffsetImm /= 2;
717  }
718 
719  // Construct the new instruction.
720  DebugLoc DL = I->getDebugLoc();
721  MachineBasicBlock *MBB = I->getParent();
723  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
724  .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
725  .add(BaseRegOp)
726  .addImm(OffsetImm)
727  .cloneMergedMemRefs({&*I, &*MergeMI})
728  .setMIFlags(I->mergeFlagsWith(*MergeMI));
729  (void)MIB;
730 
731  LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
732  LLVM_DEBUG(I->print(dbgs()));
733  LLVM_DEBUG(dbgs() << " ");
734  LLVM_DEBUG(MergeMI->print(dbgs()));
735  LLVM_DEBUG(dbgs() << " with instruction:\n ");
736  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
737  LLVM_DEBUG(dbgs() << "\n");
738 
739  // Erase the old instructions.
740  I->eraseFromParent();
741  MergeMI->eraseFromParent();
742  return NextI;
743 }
744 
745 // Apply Fn to all instructions between MI and the beginning of the block, until
746 // a def for DefReg is reached. Returns true, iff Fn returns true for all
747 // visited instructions. Stop after visiting Limit iterations.
749  const TargetRegisterInfo *TRI, unsigned Limit,
750  std::function<bool(MachineInstr &, bool)> &Fn) {
751  auto MBB = MI.getParent();
752  for (MachineInstr &I :
753  instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
754  if (!Limit)
755  return false;
756  --Limit;
757 
758  bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
759  return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
760  TRI->regsOverlap(MOP.getReg(), DefReg);
761  });
762  if (!Fn(I, isDef))
763  return false;
764  if (isDef)
765  break;
766  }
767  return true;
768 }
769 
771  const TargetRegisterInfo *TRI) {
772 
773  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
774  if (MOP.isReg() && MOP.isKill())
775  Units.removeReg(MOP.getReg());
776 
777  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
778  if (MOP.isReg() && !MOP.isKill())
779  Units.addReg(MOP.getReg());
780 }
781 
783 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
785  const LdStPairFlags &Flags) {
786  MachineBasicBlock::iterator E = I->getParent()->end();
788  // If NextI is the second of the two instructions to be merged, we need
789  // to skip one further. Either way we merge will invalidate the iterator,
790  // and we don't need to scan the new instruction, as it's a pairwise
791  // instruction, which we're not considering for further action anyway.
792  if (NextI == Paired)
793  NextI = next_nodbg(NextI, E);
794 
795  int SExtIdx = Flags.getSExtIdx();
796  unsigned Opc =
797  SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
798  bool IsUnscaled = TII->isUnscaledLdSt(Opc);
799  int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
800 
801  bool MergeForward = Flags.getMergeForward();
802 
803  Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
804  if (MergeForward && RenameReg) {
805  MCRegister RegToRename = getLdStRegOp(*I).getReg();
806  DefinedInBB.addReg(*RenameReg);
807 
808  // Return the sub/super register for RenameReg, matching the size of
809  // OriginalReg.
810  auto GetMatchingSubReg = [this,
811  RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
812  for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
813  if (TRI->getMinimalPhysRegClass(OriginalReg) ==
814  TRI->getMinimalPhysRegClass(SubOrSuper))
815  return SubOrSuper;
816  llvm_unreachable("Should have found matching sub or super register!");
817  };
818 
819  std::function<bool(MachineInstr &, bool)> UpdateMIs =
820  [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
821  if (IsDef) {
822  bool SeenDef = false;
823  for (auto &MOP : MI.operands()) {
824  // Rename the first explicit definition and all implicit
825  // definitions matching RegToRename.
826  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
827  (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
828  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
829  assert((MOP.isImplicit() ||
830  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
831  "Need renamable operands");
832  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
833  SeenDef = true;
834  }
835  }
836  } else {
837  for (auto &MOP : MI.operands()) {
838  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
839  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
840  assert((MOP.isImplicit() ||
841  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
842  "Need renamable operands");
843  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
844  }
845  }
846  }
847  LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
848  return true;
849  };
850  forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
851 
852 #if !defined(NDEBUG)
853  // Make sure the register used for renaming is not used between the paired
854  // instructions. That would trash the content before the new paired
855  // instruction.
856  for (auto &MI :
858  std::next(I), std::next(Paired)))
859  assert(all_of(MI.operands(),
860  [this, &RenameReg](const MachineOperand &MOP) {
861  return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
862  !TRI->regsOverlap(MOP.getReg(), *RenameReg);
863  }) &&
864  "Rename register used between paired instruction, trashing the "
865  "content");
866 #endif
867  }
868 
869  // Insert our new paired instruction after whichever of the paired
870  // instructions MergeForward indicates.
871  MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
872  // Also based on MergeForward is from where we copy the base register operand
873  // so we get the flags compatible with the input code.
874  const MachineOperand &BaseRegOp =
875  MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
876 
877  int Offset = getLdStOffsetOp(*I).getImm();
878  int PairedOffset = getLdStOffsetOp(*Paired).getImm();
879  bool PairedIsUnscaled = TII->isUnscaledLdSt(Paired->getOpcode());
880  if (IsUnscaled != PairedIsUnscaled) {
881  // We're trying to pair instructions that differ in how they are scaled. If
882  // I is scaled then scale the offset of Paired accordingly. Otherwise, do
883  // the opposite (i.e., make Paired's offset unscaled).
884  int MemSize = TII->getMemScale(*Paired);
885  if (PairedIsUnscaled) {
886  // If the unscaled offset isn't a multiple of the MemSize, we can't
887  // pair the operations together.
888  assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
889  "Offset should be a multiple of the stride!");
890  PairedOffset /= MemSize;
891  } else {
892  PairedOffset *= MemSize;
893  }
894  }
895 
896  // Which register is Rt and which is Rt2 depends on the offset order.
897  MachineInstr *RtMI, *Rt2MI;
898  if (Offset == PairedOffset + OffsetStride) {
899  RtMI = &*Paired;
900  Rt2MI = &*I;
901  // Here we swapped the assumption made for SExtIdx.
902  // I.e., we turn ldp I, Paired into ldp Paired, I.
903  // Update the index accordingly.
904  if (SExtIdx != -1)
905  SExtIdx = (SExtIdx + 1) % 2;
906  } else {
907  RtMI = &*I;
908  Rt2MI = &*Paired;
909  }
910  int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
911  // Scale the immediate offset, if necessary.
912  if (TII->isUnscaledLdSt(RtMI->getOpcode())) {
913  assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
914  "Unscaled offset cannot be scaled.");
915  OffsetImm /= TII->getMemScale(*RtMI);
916  }
917 
918  // Construct the new instruction.
920  DebugLoc DL = I->getDebugLoc();
921  MachineBasicBlock *MBB = I->getParent();
922  MachineOperand RegOp0 = getLdStRegOp(*RtMI);
923  MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
924  // Kill flags may become invalid when moving stores for pairing.
925  if (RegOp0.isUse()) {
926  if (!MergeForward) {
927  // Clear kill flags on store if moving upwards. Example:
928  // STRWui %w0, ...
929  // USE %w1
930  // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
931  RegOp0.setIsKill(false);
932  RegOp1.setIsKill(false);
933  } else {
934  // Clear kill flags of the first stores register. Example:
935  // STRWui %w1, ...
936  // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
937  // STRW %w0
939  for (MachineInstr &MI : make_range(std::next(I), Paired))
940  MI.clearRegisterKills(Reg, TRI);
941  }
942  }
943  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
944  .add(RegOp0)
945  .add(RegOp1)
946  .add(BaseRegOp)
947  .addImm(OffsetImm)
948  .cloneMergedMemRefs({&*I, &*Paired})
949  .setMIFlags(I->mergeFlagsWith(*Paired));
950 
951  (void)MIB;
952 
953  LLVM_DEBUG(
954  dbgs() << "Creating pair load/store. Replacing instructions:\n ");
955  LLVM_DEBUG(I->print(dbgs()));
956  LLVM_DEBUG(dbgs() << " ");
957  LLVM_DEBUG(Paired->print(dbgs()));
958  LLVM_DEBUG(dbgs() << " with instruction:\n ");
959  if (SExtIdx != -1) {
960  // Generate the sign extension for the proper result of the ldp.
961  // I.e., with X1, that would be:
962  // %w1 = KILL %w1, implicit-def %x1
963  // %x1 = SBFMXri killed %x1, 0, 31
964  MachineOperand &DstMO = MIB->getOperand(SExtIdx);
965  // Right now, DstMO has the extended register, since it comes from an
966  // extended opcode.
967  Register DstRegX = DstMO.getReg();
968  // Get the W variant of that register.
969  Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
970  // Update the result of LDP to use the W instead of the X variant.
971  DstMO.setReg(DstRegW);
972  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
973  LLVM_DEBUG(dbgs() << "\n");
974  // Make the machine verifier happy by providing a definition for
975  // the X register.
976  // Insert this definition right after the generated LDP, i.e., before
977  // InsertionPoint.
978  MachineInstrBuilder MIBKill =
979  BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
980  .addReg(DstRegW)
981  .addReg(DstRegX, RegState::Define);
982  MIBKill->getOperand(2).setImplicit();
983  // Create the sign extension.
984  MachineInstrBuilder MIBSXTW =
985  BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
986  .addReg(DstRegX)
987  .addImm(0)
988  .addImm(31);
989  (void)MIBSXTW;
990  LLVM_DEBUG(dbgs() << " Extend operand:\n ");
991  LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
992  } else {
993  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
994  }
995  LLVM_DEBUG(dbgs() << "\n");
996 
997  if (MergeForward)
998  for (const MachineOperand &MOP : phys_regs_and_masks(*I))
999  if (MOP.isReg() && MOP.isKill())
1000  DefinedInBB.addReg(MOP.getReg());
1001 
1002  // Erase the old instructions.
1003  I->eraseFromParent();
1004  Paired->eraseFromParent();
1005 
1006  return NextI;
1007 }
1008 
1010 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1011  MachineBasicBlock::iterator StoreI) {
1013  next_nodbg(LoadI, LoadI->getParent()->end());
1014 
1015  int LoadSize = TII->getMemScale(*LoadI);
1016  int StoreSize = TII->getMemScale(*StoreI);
1017  Register LdRt = getLdStRegOp(*LoadI).getReg();
1018  const MachineOperand &StMO = getLdStRegOp(*StoreI);
1019  Register StRt = getLdStRegOp(*StoreI).getReg();
1020  bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1021 
1022  assert((IsStoreXReg ||
1023  TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1024  "Unexpected RegClass");
1025 
1026  MachineInstr *BitExtMI;
1027  if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1028  // Remove the load, if the destination register of the loads is the same
1029  // register for stored value.
1030  if (StRt == LdRt && LoadSize == 8) {
1031  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1032  LoadI->getIterator())) {
1033  if (MI.killsRegister(StRt, TRI)) {
1034  MI.clearRegisterKills(StRt, TRI);
1035  break;
1036  }
1037  }
1038  LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1039  LLVM_DEBUG(LoadI->print(dbgs()));
1040  LLVM_DEBUG(dbgs() << "\n");
1041  LoadI->eraseFromParent();
1042  return NextI;
1043  }
1044  // Replace the load with a mov if the load and store are in the same size.
1045  BitExtMI =
1046  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1047  TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1048  .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1049  .add(StMO)
1051  .setMIFlags(LoadI->getFlags());
1052  } else {
1053  // FIXME: Currently we disable this transformation in big-endian targets as
1054  // performance and correctness are verified only in little-endian.
1055  if (!Subtarget->isLittleEndian())
1056  return NextI;
1057  bool IsUnscaled = TII->isUnscaledLdSt(*LoadI);
1058  assert(IsUnscaled == TII->isUnscaledLdSt(*StoreI) &&
1059  "Unsupported ld/st match");
1060  assert(LoadSize <= StoreSize && "Invalid load size");
1061  int UnscaledLdOffset = IsUnscaled
1062  ? getLdStOffsetOp(*LoadI).getImm()
1063  : getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1064  int UnscaledStOffset = IsUnscaled
1065  ? getLdStOffsetOp(*StoreI).getImm()
1066  : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1067  int Width = LoadSize * 8;
1068  unsigned DestReg =
1069  IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1070  LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1071  : LdRt;
1072 
1073  assert((UnscaledLdOffset >= UnscaledStOffset &&
1074  (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1075  "Invalid offset");
1076 
1077  int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1078  int Imms = Immr + Width - 1;
1079  if (UnscaledLdOffset == UnscaledStOffset) {
1080  uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1081  | ((Immr) << 6) // immr
1082  | ((Imms) << 0) // imms
1083  ;
1084 
1085  BitExtMI =
1086  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1087  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1088  DestReg)
1089  .add(StMO)
1090  .addImm(AndMaskEncoded)
1091  .setMIFlags(LoadI->getFlags());
1092  } else {
1093  BitExtMI =
1094  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1095  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1096  DestReg)
1097  .add(StMO)
1098  .addImm(Immr)
1099  .addImm(Imms)
1100  .setMIFlags(LoadI->getFlags());
1101  }
1102  }
1103 
1104  // Clear kill flags between store and load.
1105  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1106  BitExtMI->getIterator()))
1107  if (MI.killsRegister(StRt, TRI)) {
1108  MI.clearRegisterKills(StRt, TRI);
1109  break;
1110  }
1111 
1112  LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1113  LLVM_DEBUG(StoreI->print(dbgs()));
1114  LLVM_DEBUG(dbgs() << " ");
1115  LLVM_DEBUG(LoadI->print(dbgs()));
1116  LLVM_DEBUG(dbgs() << " with instructions:\n ");
1117  LLVM_DEBUG(StoreI->print(dbgs()));
1118  LLVM_DEBUG(dbgs() << " ");
1119  LLVM_DEBUG((BitExtMI)->print(dbgs()));
1120  LLVM_DEBUG(dbgs() << "\n");
1121 
1122  // Erase the old instructions.
1123  LoadI->eraseFromParent();
1124  return NextI;
1125 }
1126 
1127 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1128  // Convert the byte-offset used by unscaled into an "element" offset used
1129  // by the scaled pair load/store instructions.
1130  if (IsUnscaled) {
1131  // If the byte-offset isn't a multiple of the stride, there's no point
1132  // trying to match it.
1133  if (Offset % OffsetStride)
1134  return false;
1135  Offset /= OffsetStride;
1136  }
1137  return Offset <= 63 && Offset >= -64;
1138 }
1139 
1140 // Do alignment, specialized to power of 2 and for signed ints,
1141 // avoiding having to do a C-style cast from uint_64t to int when
1142 // using alignTo from include/llvm/Support/MathExtras.h.
1143 // FIXME: Move this function to include/MathExtras.h?
1144 static int alignTo(int Num, int PowOf2) {
1145  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1146 }
1147 
1148 static bool mayAlias(MachineInstr &MIa,
1150  AliasAnalysis *AA) {
1151  for (MachineInstr *MIb : MemInsns)
1152  if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
1153  return true;
1154 
1155  return false;
1156 }
1157 
1158 bool AArch64LoadStoreOpt::findMatchingStore(
1159  MachineBasicBlock::iterator I, unsigned Limit,
1160  MachineBasicBlock::iterator &StoreI) {
1161  MachineBasicBlock::iterator B = I->getParent()->begin();
1163  MachineInstr &LoadMI = *I;
1164  Register BaseReg = getLdStBaseOp(LoadMI).getReg();
1165 
1166  // If the load is the first instruction in the block, there's obviously
1167  // not any matching store.
1168  if (MBBI == B)
1169  return false;
1170 
1171  // Track which register units have been modified and used between the first
1172  // insn and the second insn.
1173  ModifiedRegUnits.clear();
1174  UsedRegUnits.clear();
1175 
1176  unsigned Count = 0;
1177  do {
1178  MBBI = prev_nodbg(MBBI, B);
1179  MachineInstr &MI = *MBBI;
1180 
1181  // Don't count transient instructions towards the search limit since there
1182  // may be different numbers of them if e.g. debug information is present.
1183  if (!MI.isTransient())
1184  ++Count;
1185 
1186  // If the load instruction reads directly from the address to which the
1187  // store instruction writes and the stored value is not modified, we can
1188  // promote the load. Since we do not handle stores with pre-/post-index,
1189  // it's unnecessary to check if BaseReg is modified by the store itself.
1190  // Also we can't handle stores without an immediate offset operand,
1191  // while the operand might be the address for a global variable.
1192  if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1193  BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
1194  isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1195  ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1196  StoreI = MBBI;
1197  return true;
1198  }
1199 
1200  if (MI.isCall())
1201  return false;
1202 
1203  // Update modified / uses register units.
1204  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1205 
1206  // Otherwise, if the base register is modified, we have no match, so
1207  // return early.
1208  if (!ModifiedRegUnits.available(BaseReg))
1209  return false;
1210 
1211  // If we encounter a store aliased with the load, return early.
1212  if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1213  return false;
1214  } while (MBBI != B && Count < Limit);
1215  return false;
1216 }
1217 
1218 // Returns true if FirstMI and MI are candidates for merging or pairing.
1219 // Otherwise, returns false.
1221  LdStPairFlags &Flags,
1222  const AArch64InstrInfo *TII) {
1223  // If this is volatile or if pairing is suppressed, not a candidate.
1224  if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1225  return false;
1226 
1227  // We should have already checked FirstMI for pair suppression and volatility.
1228  assert(!FirstMI.hasOrderedMemoryRef() &&
1229  !TII->isLdStPairSuppressed(FirstMI) &&
1230  "FirstMI shouldn't get here if either of these checks are true.");
1231 
1232  unsigned OpcA = FirstMI.getOpcode();
1233  unsigned OpcB = MI.getOpcode();
1234 
1235  // Opcodes match: nothing more to check.
1236  if (OpcA == OpcB)
1237  return true;
1238 
1239  // Try to match a sign-extended load/store with a zero-extended load/store.
1240  bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1241  unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1242  assert(IsValidLdStrOpc &&
1243  "Given Opc should be a Load or Store with an immediate");
1244  // OpcA will be the first instruction in the pair.
1245  if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1246  Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
1247  return true;
1248  }
1249 
1250  // If the second instruction isn't even a mergable/pairable load/store, bail
1251  // out.
1252  if (!PairIsValidLdStrOpc)
1253  return false;
1254 
1255  // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
1256  // offsets.
1257  if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1258  return false;
1259 
1260  // Try to match an unscaled load/store with a scaled load/store.
1261  return TII->isUnscaledLdSt(OpcA) != TII->isUnscaledLdSt(OpcB) &&
1263 
1264  // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1265 }
1266 
1267 static bool
1268 canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
1270  const TargetRegisterInfo *TRI) {
1271  if (!FirstMI.mayStore())
1272  return false;
1273 
1274  // Check if we can find an unused register which we can use to rename
1275  // the register used by the first load/store.
1276  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1277  MachineFunction &MF = *FirstMI.getParent()->getParent();
1278  if (!RegClass || !MF.getRegInfo().tracksLiveness())
1279  return false;
1280 
1281  auto RegToRename = getLdStRegOp(FirstMI).getReg();
1282  // For now, we only rename if the store operand gets killed at the store.
1283  if (!getLdStRegOp(FirstMI).isKill() &&
1284  !any_of(FirstMI.operands(),
1285  [TRI, RegToRename](const MachineOperand &MOP) {
1286  return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1287  MOP.isImplicit() && MOP.isKill() &&
1288  TRI->regsOverlap(RegToRename, MOP.getReg());
1289  })) {
1290  LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
1291  return false;
1292  }
1293  auto canRenameMOP = [TRI](const MachineOperand &MOP) {
1294  if (MOP.isReg()) {
1295  auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1296  // Renaming registers with multiple disjunct sub-registers (e.g. the
1297  // result of a LD3) means that all sub-registers are renamed, potentially
1298  // impacting other instructions we did not check. Bail out.
1299  // Note that this relies on the structure of the AArch64 register file. In
1300  // particular, a subregister cannot be written without overwriting the
1301  // whole register.
1302  if (RegClass->HasDisjunctSubRegs) {
1303  LLVM_DEBUG(
1304  dbgs()
1305  << " Cannot rename operands with multiple disjunct subregisters ("
1306  << MOP << ")\n");
1307  return false;
1308  }
1309  }
1310  return MOP.isImplicit() ||
1311  (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1312  };
1313 
1314  bool FoundDef = false;
1315 
1316  // For each instruction between FirstMI and the previous def for RegToRename,
1317  // we
1318  // * check if we can rename RegToRename in this instruction
1319  // * collect the registers used and required register classes for RegToRename.
1320  std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1321  bool IsDef) {
1322  LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
1323  // Currently we do not try to rename across frame-setup instructions.
1324  if (MI.getFlag(MachineInstr::FrameSetup)) {
1325  LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
1326  << MI << ")\n");
1327  return false;
1328  }
1329 
1330  UsedInBetween.accumulate(MI);
1331 
1332  // For a definition, check that we can rename the definition and exit the
1333  // loop.
1334  FoundDef = IsDef;
1335 
1336  // For defs, check if we can rename the first def of RegToRename.
1337  if (FoundDef) {
1338  // For some pseudo instructions, we might not generate code in the end
1339  // (e.g. KILL) and we would end up without a correct def for the rename
1340  // register.
1341  // TODO: This might be overly conservative and we could handle those cases
1342  // in multiple ways:
1343  // 1. Insert an extra copy, to materialize the def.
1344  // 2. Skip pseudo-defs until we find an non-pseudo def.
1345  if (MI.isPseudo()) {
1346  LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI
1347  << "\n");
1348  return false;
1349  }
1350 
1351  for (auto &MOP : MI.operands()) {
1352  if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1353  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1354  continue;
1355  if (!canRenameMOP(MOP)) {
1356  LLVM_DEBUG(dbgs()
1357  << " Cannot rename " << MOP << " in " << MI << "\n");
1358  return false;
1359  }
1360  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1361  }
1362  return true;
1363  } else {
1364  for (auto &MOP : MI.operands()) {
1365  if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1366  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1367  continue;
1368 
1369  if (!canRenameMOP(MOP)) {
1370  LLVM_DEBUG(dbgs()
1371  << " Cannot rename " << MOP << " in " << MI << "\n");
1372  return false;
1373  }
1374  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1375  }
1376  }
1377  return true;
1378  };
1379 
1380  if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1381  return false;
1382 
1383  if (!FoundDef) {
1384  LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1385  return false;
1386  }
1387  return true;
1388 }
1389 
1390 // Check if we can find a physical register for renaming. This register must:
1391 // * not be defined up to FirstMI (checking DefinedInBB)
1392 // * not used between the MI and the defining instruction of the register to
1393 // rename (checked using UsedInBetween).
1394 // * is available in all used register classes (checked using RequiredClasses).
1396  MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB,
1397  LiveRegUnits &UsedInBetween,
1399  const TargetRegisterInfo *TRI) {
1400  auto &MF = *FirstMI.getParent()->getParent();
1401  MachineRegisterInfo &RegInfo = MF.getRegInfo();
1402 
1403  // Checks if any sub- or super-register of PR is callee saved.
1404  auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1406  [&MF, TRI](MCPhysReg SubOrSuper) {
1407  return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1408  });
1409  };
1410 
1411  // Check if PR or one of its sub- or super-registers can be used for all
1412  // required register classes.
1413  auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1414  return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1416  [C, TRI](MCPhysReg SubOrSuper) {
1417  return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1418  });
1419  });
1420  };
1421 
1422  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1423  for (const MCPhysReg &PR : *RegClass) {
1424  if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1425  !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1426  CanBeUsedForAllClasses(PR)) {
1427  DefinedInBB.addReg(PR);
1428  LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1429  << "\n");
1430  return {PR};
1431  }
1432  }
1433  LLVM_DEBUG(dbgs() << "No rename register found from "
1434  << TRI->getRegClassName(RegClass) << "\n");
1435  return None;
1436 }
1437 
1438 /// Scan the instructions looking for a load/store that can be combined with the
1439 /// current instruction into a wider equivalent or a load/store pair.
1441 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1442  LdStPairFlags &Flags, unsigned Limit,
1443  bool FindNarrowMerge) {
1444  MachineBasicBlock::iterator E = I->getParent()->end();
1446  MachineBasicBlock::iterator MBBIWithRenameReg;
1447  MachineInstr &FirstMI = *I;
1448  MBBI = next_nodbg(MBBI, E);
1449 
1450  bool MayLoad = FirstMI.mayLoad();
1451  bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
1452  Register Reg = getLdStRegOp(FirstMI).getReg();
1453  Register BaseReg = getLdStBaseOp(FirstMI).getReg();
1454  int Offset = getLdStOffsetOp(FirstMI).getImm();
1455  int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1456  bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1457 
1458  Optional<bool> MaybeCanRename = None;
1459  if (!EnableRenaming)
1460  MaybeCanRename = {false};
1461 
1463  LiveRegUnits UsedInBetween;
1464  UsedInBetween.init(*TRI);
1465 
1466  Flags.clearRenameReg();
1467 
1468  // Track which register units have been modified and used between the first
1469  // insn (inclusive) and the second insn.
1470  ModifiedRegUnits.clear();
1471  UsedRegUnits.clear();
1472 
1473  // Remember any instructions that read/write memory between FirstMI and MI.
1475 
1476  for (unsigned Count = 0; MBBI != E && Count < Limit;
1477  MBBI = next_nodbg(MBBI, E)) {
1478  MachineInstr &MI = *MBBI;
1479 
1480  UsedInBetween.accumulate(MI);
1481 
1482  // Don't count transient instructions towards the search limit since there
1483  // may be different numbers of them if e.g. debug information is present.
1484  if (!MI.isTransient())
1485  ++Count;
1486 
1487  Flags.setSExtIdx(-1);
1488  if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
1489  getLdStOffsetOp(MI).isImm()) {
1490  assert(MI.mayLoadOrStore() && "Expected memory operation.");
1491  // If we've found another instruction with the same opcode, check to see
1492  // if the base and offset are compatible with our starting instruction.
1493  // These instructions all have scaled immediate operands, so we just
1494  // check for +1/-1. Make sure to check the new instruction offset is
1495  // actually an immediate and not a symbolic reference destined for
1496  // a relocation.
1497  Register MIBaseReg = getLdStBaseOp(MI).getReg();
1498  int MIOffset = getLdStOffsetOp(MI).getImm();
1499  bool MIIsUnscaled = TII->isUnscaledLdSt(MI);
1500  if (IsUnscaled != MIIsUnscaled) {
1501  // We're trying to pair instructions that differ in how they are scaled.
1502  // If FirstMI is scaled then scale the offset of MI accordingly.
1503  // Otherwise, do the opposite (i.e., make MI's offset unscaled).
1504  int MemSize = TII->getMemScale(MI);
1505  if (MIIsUnscaled) {
1506  // If the unscaled offset isn't a multiple of the MemSize, we can't
1507  // pair the operations together: bail and keep looking.
1508  if (MIOffset % MemSize) {
1509  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1510  UsedRegUnits, TRI);
1511  MemInsns.push_back(&MI);
1512  continue;
1513  }
1514  MIOffset /= MemSize;
1515  } else {
1516  MIOffset *= MemSize;
1517  }
1518  }
1519 
1520  if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
1521  (Offset + OffsetStride == MIOffset))) {
1522  int MinOffset = Offset < MIOffset ? Offset : MIOffset;
1523  if (FindNarrowMerge) {
1524  // If the alignment requirements of the scaled wide load/store
1525  // instruction can't express the offset of the scaled narrow input,
1526  // bail and keep looking. For promotable zero stores, allow only when
1527  // the stored value is the same (i.e., WZR).
1528  if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1529  (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
1530  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1531  UsedRegUnits, TRI);
1532  MemInsns.push_back(&MI);
1533  continue;
1534  }
1535  } else {
1536  // Pairwise instructions have a 7-bit signed offset field. Single
1537  // insns have a 12-bit unsigned offset field. If the resultant
1538  // immediate offset of merging these instructions is out of range for
1539  // a pairwise instruction, bail and keep looking.
1540  if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1541  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1542  UsedRegUnits, TRI);
1543  MemInsns.push_back(&MI);
1544  continue;
1545  }
1546  // If the alignment requirements of the paired (scaled) instruction
1547  // can't express the offset of the unscaled input, bail and keep
1548  // looking.
1549  if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
1550  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1551  UsedRegUnits, TRI);
1552  MemInsns.push_back(&MI);
1553  continue;
1554  }
1555  }
1556  // If the destination register of one load is the same register or a
1557  // sub/super register of the other load, bail and keep looking. A
1558  // load-pair instruction with both destination registers the same is
1559  // UNPREDICTABLE and will result in an exception.
1560  if (MayLoad &&
1562  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1563  TRI);
1564  MemInsns.push_back(&MI);
1565  continue;
1566  }
1567 
1568  // If the BaseReg has been modified, then we cannot do the optimization.
1569  // For example, in the following pattern
1570  // ldr x1 [x2]
1571  // ldr x2 [x3]
1572  // ldr x4 [x2, #8],
1573  // the first and third ldr cannot be converted to ldp x1, x4, [x2]
1574  if (!ModifiedRegUnits.available(BaseReg))
1575  return E;
1576 
1577  // If the Rt of the second instruction was not modified or used between
1578  // the two instructions and none of the instructions between the second
1579  // and first alias with the second, we can combine the second into the
1580  // first.
1581  if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
1582  !(MI.mayLoad() &&
1583  !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
1584  !mayAlias(MI, MemInsns, AA)) {
1585 
1586  Flags.setMergeForward(false);
1587  Flags.clearRenameReg();
1588  return MBBI;
1589  }
1590 
1591  // Likewise, if the Rt of the first instruction is not modified or used
1592  // between the two instructions and none of the instructions between the
1593  // first and the second alias with the first, we can combine the first
1594  // into the second.
1595  if (!(MayLoad &&
1596  !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
1597  !mayAlias(FirstMI, MemInsns, AA)) {
1598 
1599  if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
1600  Flags.setMergeForward(true);
1601  Flags.clearRenameReg();
1602  return MBBI;
1603  }
1604 
1605  if (DebugCounter::shouldExecute(RegRenamingCounter)) {
1606  if (!MaybeCanRename)
1607  MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
1608  RequiredClasses, TRI)};
1609 
1610  if (*MaybeCanRename) {
1612  FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
1613  TRI);
1614  if (MaybeRenameReg) {
1615  Flags.setRenameReg(*MaybeRenameReg);
1616  Flags.setMergeForward(true);
1617  MBBIWithRenameReg = MBBI;
1618  }
1619  }
1620  }
1621  }
1622  // Unable to combine these instructions due to interference in between.
1623  // Keep looking.
1624  }
1625  }
1626 
1627  if (Flags.getRenameReg())
1628  return MBBIWithRenameReg;
1629 
1630  // If the instruction wasn't a matching load or store. Stop searching if we
1631  // encounter a call instruction that might modify memory.
1632  if (MI.isCall())
1633  return E;
1634 
1635  // Update modified / uses register units.
1636  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1637 
1638  // Otherwise, if the base register is modified, we have no match, so
1639  // return early.
1640  if (!ModifiedRegUnits.available(BaseReg))
1641  return E;
1642 
1643  // Update list of instructions that read/write memory.
1644  if (MI.mayLoadOrStore())
1645  MemInsns.push_back(&MI);
1646  }
1647  return E;
1648 }
1649 
1651 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
1653  bool IsPreIdx) {
1654  assert((Update->getOpcode() == AArch64::ADDXri ||
1655  Update->getOpcode() == AArch64::SUBXri) &&
1656  "Unexpected base register update instruction to merge!");
1657  MachineBasicBlock::iterator E = I->getParent()->end();
1659  // Return the instruction following the merged instruction, which is
1660  // the instruction following our unmerged load. Unless that's the add/sub
1661  // instruction we're merging, in which case it's the one after that.
1662  if (NextI == Update)
1663  NextI = next_nodbg(NextI, E);
1664 
1665  int Value = Update->getOperand(2).getImm();
1666  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
1667  "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1668  if (Update->getOpcode() == AArch64::SUBXri)
1669  Value = -Value;
1670 
1671  unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
1673  MachineInstrBuilder MIB;
1674  int Scale, MinOffset, MaxOffset;
1675  getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
1676  if (!isPairedLdSt(*I)) {
1677  // Non-paired instruction.
1678  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1679  .add(getLdStRegOp(*Update))
1680  .add(getLdStRegOp(*I))
1681  .add(getLdStBaseOp(*I))
1682  .addImm(Value / Scale)
1683  .setMemRefs(I->memoperands())
1684  .setMIFlags(I->mergeFlagsWith(*Update));
1685  } else {
1686  // Paired instruction.
1687  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1688  .add(getLdStRegOp(*Update))
1689  .add(getLdStRegOp(*I, 0))
1690  .add(getLdStRegOp(*I, 1))
1691  .add(getLdStBaseOp(*I))
1692  .addImm(Value / Scale)
1693  .setMemRefs(I->memoperands())
1694  .setMIFlags(I->mergeFlagsWith(*Update));
1695  }
1696  (void)MIB;
1697 
1698  if (IsPreIdx) {
1699  ++NumPreFolded;
1700  LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
1701  } else {
1702  ++NumPostFolded;
1703  LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
1704  }
1705  LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
1706  LLVM_DEBUG(I->print(dbgs()));
1707  LLVM_DEBUG(dbgs() << " ");
1708  LLVM_DEBUG(Update->print(dbgs()));
1709  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1710  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1711  LLVM_DEBUG(dbgs() << "\n");
1712 
1713  // Erase the old instructions for the block.
1714  I->eraseFromParent();
1715  Update->eraseFromParent();
1716 
1717  return NextI;
1718 }
1719 
1720 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
1721  MachineInstr &MI,
1722  unsigned BaseReg, int Offset) {
1723  switch (MI.getOpcode()) {
1724  default:
1725  break;
1726  case AArch64::SUBXri:
1727  case AArch64::ADDXri:
1728  // Make sure it's a vanilla immediate operand, not a relocation or
1729  // anything else we can't handle.
1730  if (!MI.getOperand(2).isImm())
1731  break;
1732  // Watch out for 1 << 12 shifted value.
1733  if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
1734  break;
1735 
1736  // The update instruction source and destination register must be the
1737  // same as the load/store base register.
1738  if (MI.getOperand(0).getReg() != BaseReg ||
1739  MI.getOperand(1).getReg() != BaseReg)
1740  break;
1741 
1742  int UpdateOffset = MI.getOperand(2).getImm();
1743  if (MI.getOpcode() == AArch64::SUBXri)
1744  UpdateOffset = -UpdateOffset;
1745 
1746  // The immediate must be a multiple of the scaling factor of the pre/post
1747  // indexed instruction.
1748  int Scale, MinOffset, MaxOffset;
1749  getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
1750  if (UpdateOffset % Scale != 0)
1751  break;
1752 
1753  // Scaled offset must fit in the instruction immediate.
1754  int ScaledOffset = UpdateOffset / Scale;
1755  if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1756  break;
1757 
1758  // If we have a non-zero Offset, we check that it matches the amount
1759  // we're adding to the register.
1760  if (!Offset || Offset == UpdateOffset)
1761  return true;
1762  break;
1763  }
1764  return false;
1765 }
1766 
1767 static bool needsWinCFI(const MachineFunction *MF) {
1768  return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1770 }
1771 
1772 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
1773  MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
1774  MachineBasicBlock::iterator E = I->getParent()->end();
1775  MachineInstr &MemMI = *I;
1777 
1778  Register BaseReg = getLdStBaseOp(MemMI).getReg();
1779  int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
1780 
1781  // Scan forward looking for post-index opportunities. Updating instructions
1782  // can't be formed if the memory instruction doesn't have the offset we're
1783  // looking for.
1784  if (MIUnscaledOffset != UnscaledOffset)
1785  return E;
1786 
1787  // If the base register overlaps a source/destination register, we can't
1788  // merge the update. This does not apply to tag store instructions which
1789  // ignore the address part of the source register.
1790  // This does not apply to STGPi as well, which does not have unpredictable
1791  // behavior in this case unlike normal stores, and always performs writeback
1792  // after reading the source register value.
1793  if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
1794  bool IsPairedInsn = isPairedLdSt(MemMI);
1795  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1796  Register DestReg = getLdStRegOp(MemMI, i).getReg();
1797  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1798  return E;
1799  }
1800  }
1801 
1802  // Track which register units have been modified and used between the first
1803  // insn (inclusive) and the second insn.
1804  ModifiedRegUnits.clear();
1805  UsedRegUnits.clear();
1806  MBBI = next_nodbg(MBBI, E);
1807 
1808  // We can't post-increment the stack pointer if any instruction between
1809  // the memory access (I) and the increment (MBBI) can access the memory
1810  // region defined by [SP, MBBI].
1811  const bool BaseRegSP = BaseReg == AArch64::SP;
1812  if (BaseRegSP && needsWinCFI(I->getMF())) {
1813  // FIXME: For now, we always block the optimization over SP in windows
1814  // targets as it requires to adjust the unwind/debug info, messing up
1815  // the unwind info can actually cause a miscompile.
1816  return E;
1817  }
1818 
1819  for (unsigned Count = 0; MBBI != E && Count < Limit;
1820  MBBI = next_nodbg(MBBI, E)) {
1821  MachineInstr &MI = *MBBI;
1822 
1823  // Don't count transient instructions towards the search limit since there
1824  // may be different numbers of them if e.g. debug information is present.
1825  if (!MI.isTransient())
1826  ++Count;
1827 
1828  // If we found a match, return it.
1829  if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1830  return MBBI;
1831 
1832  // Update the status of what the instruction clobbered and used.
1833  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1834 
1835  // Otherwise, if the base register is used or modified, we have no match, so
1836  // return early.
1837  // If we are optimizing SP, do not allow instructions that may load or store
1838  // in between the load and the optimized value update.
1839  if (!ModifiedRegUnits.available(BaseReg) ||
1840  !UsedRegUnits.available(BaseReg) ||
1841  (BaseRegSP && MBBI->mayLoadOrStore()))
1842  return E;
1843  }
1844  return E;
1845 }
1846 
1847 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
1848  MachineBasicBlock::iterator I, unsigned Limit) {
1849  MachineBasicBlock::iterator B = I->getParent()->begin();
1850  MachineBasicBlock::iterator E = I->getParent()->end();
1851  MachineInstr &MemMI = *I;
1853  MachineFunction &MF = *MemMI.getMF();
1854 
1855  Register BaseReg = getLdStBaseOp(MemMI).getReg();
1856  int Offset = getLdStOffsetOp(MemMI).getImm();
1857 
1858  // If the load/store is the first instruction in the block, there's obviously
1859  // not any matching update. Ditto if the memory offset isn't zero.
1860  if (MBBI == B || Offset != 0)
1861  return E;
1862  // If the base register overlaps a destination register, we can't
1863  // merge the update.
1864  if (!isTagStore(MemMI)) {
1865  bool IsPairedInsn = isPairedLdSt(MemMI);
1866  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1867  Register DestReg = getLdStRegOp(MemMI, i).getReg();
1868  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1869  return E;
1870  }
1871  }
1872 
1873  const bool BaseRegSP = BaseReg == AArch64::SP;
1874  if (BaseRegSP && needsWinCFI(I->getMF())) {
1875  // FIXME: For now, we always block the optimization over SP in windows
1876  // targets as it requires to adjust the unwind/debug info, messing up
1877  // the unwind info can actually cause a miscompile.
1878  return E;
1879  }
1880 
1881  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1882  unsigned RedZoneSize =
1883  Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
1884 
1885  // Track which register units have been modified and used between the first
1886  // insn (inclusive) and the second insn.
1887  ModifiedRegUnits.clear();
1888  UsedRegUnits.clear();
1889  unsigned Count = 0;
1890  bool MemAcessBeforeSPPreInc = false;
1891  do {
1892  MBBI = prev_nodbg(MBBI, B);
1893  MachineInstr &MI = *MBBI;
1894 
1895  // Don't count transient instructions towards the search limit since there
1896  // may be different numbers of them if e.g. debug information is present.
1897  if (!MI.isTransient())
1898  ++Count;
1899 
1900  // If we found a match, return it.
1901  if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
1902  // Check that the update value is within our red zone limit (which may be
1903  // zero).
1904  if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
1905  return E;
1906  return MBBI;
1907  }
1908 
1909  // Update the status of what the instruction clobbered and used.
1910  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1911 
1912  // Otherwise, if the base register is used or modified, we have no match, so
1913  // return early.
1914  if (!ModifiedRegUnits.available(BaseReg) ||
1915  !UsedRegUnits.available(BaseReg))
1916  return E;
1917  // Keep track if we have a memory access before an SP pre-increment, in this
1918  // case we need to validate later that the update amount respects the red
1919  // zone.
1920  if (BaseRegSP && MBBI->mayLoadOrStore())
1921  MemAcessBeforeSPPreInc = true;
1922  } while (MBBI != B && Count < Limit);
1923  return E;
1924 }
1925 
1926 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
1928  MachineInstr &MI = *MBBI;
1929  // If this is a volatile load, don't mess with it.
1930  if (MI.hasOrderedMemoryRef())
1931  return false;
1932 
1933  // Make sure this is a reg+imm.
1934  // FIXME: It is possible to extend it to handle reg+reg cases.
1935  if (!getLdStOffsetOp(MI).isImm())
1936  return false;
1937 
1938  // Look backward up to LdStLimit instructions.
1940  if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
1941  ++NumLoadsFromStoresPromoted;
1942  // Promote the load. Keeping the iterator straight is a
1943  // pain, so we let the merge routine tell us what the next instruction
1944  // is after it's done mucking about.
1945  MBBI = promoteLoadFromStore(MBBI, StoreI);
1946  return true;
1947  }
1948  return false;
1949 }
1950 
1951 // Merge adjacent zero stores into a wider store.
1952 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
1954  assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
1955  MachineInstr &MI = *MBBI;
1956  MachineBasicBlock::iterator E = MI.getParent()->end();
1957 
1958  if (!TII->isCandidateToMergeOrPair(MI))
1959  return false;
1960 
1961  // Look ahead up to LdStLimit instructions for a mergable instruction.
1962  LdStPairFlags Flags;
1963  MachineBasicBlock::iterator MergeMI =
1964  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
1965  if (MergeMI != E) {
1966  ++NumZeroStoresPromoted;
1967 
1968  // Keeping the iterator straight is a pain, so we let the merge routine tell
1969  // us what the next instruction is after it's done mucking about.
1970  MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
1971  return true;
1972  }
1973  return false;
1974 }
1975 
1976 // Find loads and stores that can be merged into a single load or store pair
1977 // instruction.
1978 bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
1979  MachineInstr &MI = *MBBI;
1980  MachineBasicBlock::iterator E = MI.getParent()->end();
1981 
1982  if (!TII->isCandidateToMergeOrPair(MI))
1983  return false;
1984 
1985  // Early exit if the offset is not possible to match. (6 bits of positive
1986  // range, plus allow an extra one in case we find a later insn that matches
1987  // with Offset-1)
1988  bool IsUnscaled = TII->isUnscaledLdSt(MI);
1989  int Offset = getLdStOffsetOp(MI).getImm();
1990  int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
1991  // Allow one more for offset.
1992  if (Offset > 0)
1993  Offset -= OffsetStride;
1994  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
1995  return false;
1996 
1997  // Look ahead up to LdStLimit instructions for a pairable instruction.
1998  LdStPairFlags Flags;
2000  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2001  if (Paired != E) {
2002  ++NumPairCreated;
2003  if (TII->isUnscaledLdSt(MI))
2004  ++NumUnscaledPairCreated;
2005  // Keeping the iterator straight is a pain, so we let the merge routine tell
2006  // us what the next instruction is after it's done mucking about.
2007  auto Prev = std::prev(MBBI);
2008  MBBI = mergePairedInsns(MBBI, Paired, Flags);
2009  // Collect liveness info for instructions between Prev and the new position
2010  // MBBI.
2011  for (auto I = std::next(Prev); I != MBBI; I++)
2012  updateDefinedRegisters(*I, DefinedInBB, TRI);
2013 
2014  return true;
2015  }
2016  return false;
2017 }
2018 
2019 bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2021  MachineInstr &MI = *MBBI;
2022  MachineBasicBlock::iterator E = MI.getParent()->end();
2024 
2025  // Look forward to try to form a post-index instruction. For example,
2026  // ldr x0, [x20]
2027  // add x20, x20, #32
2028  // merged into:
2029  // ldr x0, [x20], #32
2030  Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2031  if (Update != E) {
2032  // Merge the update into the ld/st.
2033  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
2034  return true;
2035  }
2036 
2037  // Don't know how to handle unscaled pre/post-index versions below, so bail.
2038  if (TII->isUnscaledLdSt(MI.getOpcode()))
2039  return false;
2040 
2041  // Look back to try to find a pre-index instruction. For example,
2042  // add x0, x0, #8
2043  // ldr x1, [x0]
2044  // merged into:
2045  // ldr x1, [x0, #8]!
2046  Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
2047  if (Update != E) {
2048  // Merge the update into the ld/st.
2049  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2050  return true;
2051  }
2052 
2053  // The immediate in the load/store is scaled by the size of the memory
2054  // operation. The immediate in the add we're looking for,
2055  // however, is not, so adjust here.
2056  int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
2057 
2058  // Look forward to try to find a pre-index instruction. For example,
2059  // ldr x1, [x0, #64]
2060  // add x0, x0, #64
2061  // merged into:
2062  // ldr x1, [x0, #64]!
2063  Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2064  if (Update != E) {
2065  // Merge the update into the ld/st.
2066  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2067  return true;
2068  }
2069 
2070  return false;
2071 }
2072 
2074  bool EnableNarrowZeroStOpt) {
2075 
2076  bool Modified = false;
2077  // Four tranformations to do here:
2078  // 1) Find loads that directly read from stores and promote them by
2079  // replacing with mov instructions. If the store is wider than the load,
2080  // the load will be replaced with a bitfield extract.
2081  // e.g.,
2082  // str w1, [x0, #4]
2083  // ldrh w2, [x0, #6]
2084  // ; becomes
2085  // str w1, [x0, #4]
2086  // lsr w2, w1, #16
2088  MBBI != E;) {
2089  if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
2090  Modified = true;
2091  else
2092  ++MBBI;
2093  }
2094  // 2) Merge adjacent zero stores into a wider store.
2095  // e.g.,
2096  // strh wzr, [x0]
2097  // strh wzr, [x0, #2]
2098  // ; becomes
2099  // str wzr, [x0]
2100  // e.g.,
2101  // str wzr, [x0]
2102  // str wzr, [x0, #4]
2103  // ; becomes
2104  // str xzr, [x0]
2105  if (EnableNarrowZeroStOpt)
2107  MBBI != E;) {
2108  if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
2109  Modified = true;
2110  else
2111  ++MBBI;
2112  }
2113  // 3) Find loads and stores that can be merged into a single load or store
2114  // pair instruction.
2115  // e.g.,
2116  // ldr x0, [x2]
2117  // ldr x1, [x2, #8]
2118  // ; becomes
2119  // ldp x0, x1, [x2]
2120 
2121  if (MBB.getParent()->getRegInfo().tracksLiveness()) {
2122  DefinedInBB.clear();
2123  DefinedInBB.addLiveIns(MBB);
2124  }
2125 
2127  MBBI != E;) {
2128  // Track currently live registers up to this point, to help with
2129  // searching for a rename register on demand.
2130  updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
2131  if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
2132  Modified = true;
2133  else
2134  ++MBBI;
2135  }
2136  // 4) Find base register updates that can be merged into the load or store
2137  // as a base-reg writeback.
2138  // e.g.,
2139  // ldr x0, [x2]
2140  // add x2, x2, #4
2141  // ; becomes
2142  // ldr x0, [x2], #4
2144  MBBI != E;) {
2145  if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
2146  Modified = true;
2147  else
2148  ++MBBI;
2149  }
2150 
2151  return Modified;
2152 }
2153 
2154 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2155  if (skipFunction(Fn.getFunction()))
2156  return false;
2157 
2158  Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
2159  TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
2160  TRI = Subtarget->getRegisterInfo();
2161  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2162 
2163  // Resize the modified and used register unit trackers. We do this once
2164  // per function and then clear the register units each time we optimize a load
2165  // or store.
2166  ModifiedRegUnits.init(*TRI);
2167  UsedRegUnits.init(*TRI);
2168  DefinedInBB.init(*TRI);
2169 
2170  bool Modified = false;
2171  bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2172  for (auto &MBB : Fn) {
2173  auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
2174  Modified |= M;
2175  }
2176 
2177  return Modified;
2178 }
2179 
2180 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
2181 // stores near one another? Note: The pre-RA instruction scheduler already has
2182 // hooks to try and schedule pairable loads/stores together to improve pairing
2183 // opportunities. Thus, pre-RA pairing pass may not be worth the effort.
2184 
2185 // FIXME: When pairing store instructions it's very possible for this pass to
2186 // hoist a store with a KILL marker above another use (without a KILL marker).
2187 // The resulting IR is invalid, but nothing uses the KILL markers after this
2188 // pass, so it's never caused a problem in practice.
2189 
2190 /// createAArch64LoadStoreOptimizationPass - returns an instance of the
2191 /// load / store optimization pass.
2193  return new AArch64LoadStoreOpt();
2194 }
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=false)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition: MachineBasicBlock.h:1153
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132
llvm
Definition: AllocatorList.h:23
isMergeableLdStUpdate
static bool isMergeableLdStUpdate(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:619
AArch64MachineFunctionInfo.h
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:34
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::LiveRegUnits::accumulateUsedDefed
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
Definition: LiveRegUnits.h:47
tryToFindRegisterToRename
static Optional< MCPhysReg > tryToFindRegisterToRename(MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1395
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:225
StringRef.h
Pass.h
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:851
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:994
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:497
llvm::SmallVector< MachineInstr *, 4 >
updateDefinedRegisters
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:770
Statistic.h
AARCH64_LOAD_STORE_OPT_NAME
#define AARCH64_LOAD_STORE_OPT_NAME
Definition: AArch64LoadStoreOptimizer.cpp:75
ErrorHandling.h
llvm::MachineOperand::isTied
bool isTied() const
Definition: MachineOperand.h:438
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64LoadStoreOptimizer.cpp:49
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::LiveRegUnits::available
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::prev_nodbg
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=false)
Decrement It, then continue decrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1146
llvm::LiveRegUnits::addReg
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
Definition: LiveRegUnits.h:86
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:305
llvm::MCRegisterInfo::sub_and_superregs_inclusive
detail::concat_range< const MCPhysReg, iterator_range< mc_subreg_iterator >, iterator_range< mc_superreg_iterator > > sub_and_superregs_inclusive(MCRegister Reg) const
Return an iterator range over all sub- and super-registers of Reg, including Reg.
Definition: MCRegisterInfo.h:338
llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:663
isPromotableZeroStoreInst
static bool isPromotableZeroStoreInst(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:594
getMatchingWideOpcode
static unsigned getMatchingWideOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:281
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::Optional
Definition: APInt.h:34
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::createAArch64LoadStoreOptimizationPass
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
Definition: AArch64LoadStoreOptimizer.cpp:2192
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition: AArch64Subtarget.h:302
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1376
MachineRegisterInfo.h
AliasAnalysis.h
llvm::MachineRegisterInfo::tracksLiveness
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Definition: MachineRegisterInfo.h:197
INITIALIZE_PASS
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:211
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
CommandLine.h
llvm::MachineOperand::isRenamable
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
Definition: MachineOperand.cpp:118
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:91
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:377
AArch64InstrInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::LiveRegUnits::accumulate
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
Definition: LiveRegUnits.cpp:60
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:38
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:534
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:367
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:488
llvm::MachineRegisterInfo::isReserved
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Definition: MachineRegisterInfo.h:900
getPostIndexedOpcode
static unsigned getPostIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:443
llvm::MachineInstr::mayAlias
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
Definition: MachineInstr.cpp:1328
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::phys_regs_and_masks
iterator_range< filter_iterator< ConstMIBundleOperands, std::function< bool(const MachineOperand &)> > > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
Definition: LiveRegUnits.h:166
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:166
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:82
mayAlias
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
Definition: AArch64LoadStoreOptimizer.cpp:1148
llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition: AArch64AddressingModes.h:98
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
LdStLimit
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopDeletionResult::Modified
@ Modified
forAllMIsUntilDef
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
Definition: AArch64LoadStoreOptimizer.cpp:748
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:402
BitVector.h
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:45
llvm::AArch64_AM::getShiftValue
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
Definition: AArch64AddressingModes.h:85
DebugLoc.h
areCandidatesToMergeOrPair
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:1220
getPrePostIndexedMemOpInfo
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
Definition: AArch64LoadStoreOptimizer.cpp:543
llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
DEBUG_COUNTER
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming")
isPairedLdSt
static bool isPairedLdSt(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:522
llvm::LiveRegUnits::removeReg
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
Definition: LiveRegUnits.h:102
llvm::None
const NoneType None
Definition: None.h:23
EnableRenaming
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
needsWinCFI
static bool needsWinCFI(const MachineFunction *MF)
Definition: AArch64LoadStoreOptimizer.cpp:1767
llvm::MCID::MayLoad
@ MayLoad
Definition: MCInstrDesc.h:166
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
AArch64AddressingModes.h
UpdateLimit
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:723
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:258
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:318
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:70
isTagStore
static bool isTagStore(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:228
llvm::TargetRegisterInfo::getRegClassName
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
Definition: TargetRegisterInfo.h:729
getLdStOffsetOp
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:573
getPreIndexedOpcode
static unsigned getPreIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:370
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::TargetRegisterInfo::getMatchingSuperReg
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Definition: TargetRegisterInfo.h:561
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
MCRegisterInfo.h
MachineFunctionPass.h
llvm::LiveRegUnits::init
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition: LiveRegUnits.h:73
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineOperand::isEarlyClobber
bool isEarlyClobber() const
Definition: MachineOperand.h:433
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
canRenameUpToDef
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1268
iterator_range.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:357
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:202
isLdOffsetInRangeOfSt
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:578
MCAsmInfo.h
inBoundsForPair
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
Definition: AArch64LoadStoreOptimizer.cpp:1127
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:478
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
uint32_t
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:372
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::MachineInstrBuilder::cloneMergedMemRefs
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
Definition: MachineInstrBuilder.h:220
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition: AArch64Subtarget.h:306
getLdStBaseOp
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:568
isPromotableLoadFromStore
static bool isPromotableLoadFromStore(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:601
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:551
llvm::initializeAArch64LoadStoreOptPass
void initializeAArch64LoadStoreOptPass(PassRegistry &)
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1007
optimizeBlock
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
Definition: ScalarizeMaskedMemIntrin.cpp:913
llvm::next_nodbg
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=false)
Increment It, then continue incrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1139
getMatchingNonSExtOpcode
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
Definition: AArch64LoadStoreOptimizer.cpp:240
DebugCounter.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::Function::needsUnwindTableEntry
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:678
llvm::MachineInstrBuilder::setMemRefs
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
Definition: MachineInstrBuilder.h:209
llvm::MCRegisterInfo::isSubRegister
bool isSubRegister(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA.
Definition: MCRegisterInfo.h:560
isMatchingStore
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
Definition: AArch64LoadStoreOptimizer.cpp:340
llvm::MachineOperand::setImplicit
void setImplicit(bool Val=true)
Definition: MachineOperand.h:492
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:320
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::MachineOperand::isDebug
bool isDebug() const
Definition: MachineOperand.h:443
llvm::AArch64TargetLowering::getRedZoneSize
unsigned getRedZoneSize(const Function &F) const
Definition: AArch64ISelLowering.h:816
AArch64Subtarget.h
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:274
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:329
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1269
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::AArch64Subtarget::requiresStrictAlign
bool requiresStrictAlign() const
Definition: AArch64Subtarget.h:345
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1078
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AArch64InstrInfo::getMemScale
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
Definition: AArch64InstrInfo.cpp:2633
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::MCRegisterInfo::isSuperOrSubRegisterEq
bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a super-register or sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:580
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::TargetRegisterInfo::getMinimalPhysRegClass
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Definition: TargetRegisterInfo.cpp:211
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
raw_ostream.h
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
getMatchingPairOpcode
static unsigned getMatchingPairOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:300
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:686
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:607
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
getLdStRegOp
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
Definition: AArch64LoadStoreOptimizer.cpp:561
getReg
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:580
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:22
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38