LLVM  15.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that performs load / store related peephole
10 // optimizations. This pass should be run after register allocation.
11 //
12 // The pass runs after the PrologEpilogInserter where we emit the CFI
13 // instructions. In order to preserve the correctness of the unwind informaiton,
14 // the pass should not change the order of any two instructions, one of which
15 // has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16 // to unwind information.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "AArch64InstrInfo.h"
22 #include "AArch64Subtarget.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/ADT/StringRef.h"
38 #include "llvm/IR/DebugLoc.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCDwarf.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/Pass.h"
44 #include "llvm/Support/Debug.h"
48 #include <cassert>
49 #include <cstdint>
50 #include <functional>
51 #include <iterator>
52 #include <limits>
53 
54 using namespace llvm;
55 
56 #define DEBUG_TYPE "aarch64-ldst-opt"
57 
58 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
59 STATISTIC(NumPostFolded, "Number of post-index updates folded");
60 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
61 STATISTIC(NumUnscaledPairCreated,
62  "Number of load/store from unscaled generated");
63 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
64 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
65 
66 DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
67  "Controls which pairs are considered for renaming");
68 
69 // The LdStLimit limits how far we search for load/store pairs.
70 static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
71  cl::init(20), cl::Hidden);
72 
73 // The UpdateLimit limits how far we search for update instructions when we form
74 // pre-/post-index instructions.
75 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
76  cl::Hidden);
77 
78 // Enable register renaming to find additional store pairing opportunities.
79 static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
80  cl::init(true), cl::Hidden);
81 
82 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
83 
84 namespace {
85 
86 using LdStPairFlags = struct LdStPairFlags {
87  // If a matching instruction is found, MergeForward is set to true if the
88  // merge is to remove the first instruction and replace the second with
89  // a pair-wise insn, and false if the reverse is true.
90  bool MergeForward = false;
91 
92  // SExtIdx gives the index of the result of the load pair that must be
93  // extended. The value of SExtIdx assumes that the paired load produces the
94  // value in this order: (I, returned iterator), i.e., -1 means no value has
95  // to be extended, 0 means I, and 1 means the returned iterator.
96  int SExtIdx = -1;
97 
98  // If not none, RenameReg can be used to rename the result register of the
99  // first store in a pair. Currently this only works when merging stores
100  // forward.
101  Optional<MCPhysReg> RenameReg = None;
102 
103  LdStPairFlags() = default;
104 
105  void setMergeForward(bool V = true) { MergeForward = V; }
106  bool getMergeForward() const { return MergeForward; }
107 
108  void setSExtIdx(int V) { SExtIdx = V; }
109  int getSExtIdx() const { return SExtIdx; }
110 
111  void setRenameReg(MCPhysReg R) { RenameReg = R; }
112  void clearRenameReg() { RenameReg = None; }
113  Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
114 };
115 
116 struct AArch64LoadStoreOpt : public MachineFunctionPass {
117  static char ID;
118 
119  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
121  }
122 
123  AliasAnalysis *AA;
124  const AArch64InstrInfo *TII;
125  const TargetRegisterInfo *TRI;
126  const AArch64Subtarget *Subtarget;
127 
128  // Track which register units have been modified and used.
129  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
130  LiveRegUnits DefinedInBB;
131 
132  void getAnalysisUsage(AnalysisUsage &AU) const override {
135  }
136 
137  // Scan the instructions looking for a load/store that can be combined
138  // with the current instruction into a load/store pair.
139  // Return the matching instruction if one is found, else MBB->end().
141  LdStPairFlags &Flags,
142  unsigned Limit,
143  bool FindNarrowMerge);
144 
145  // Scan the instructions looking for a store that writes to the address from
146  // which the current load instruction reads. Return true if one is found.
147  bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
149 
150  // Merge the two instructions indicated into a wider narrow store instruction.
152  mergeNarrowZeroStores(MachineBasicBlock::iterator I,
154  const LdStPairFlags &Flags);
155 
156  // Merge the two instructions indicated into a single pair-wise instruction.
158  mergePairedInsns(MachineBasicBlock::iterator I,
160  const LdStPairFlags &Flags);
161 
162  // Promote the load that reads directly from the address stored to.
164  promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
166 
167  // Scan the instruction list to find a base register update that can
168  // be combined with the current instruction (a load or store) using
169  // pre or post indexed addressing with writeback. Scan forwards.
171  findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
172  int UnscaledOffset, unsigned Limit);
173 
174  // Scan the instruction list to find a base register update that can
175  // be combined with the current instruction (a load or store) using
176  // pre or post indexed addressing with writeback. Scan backwards.
178  findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
179 
180  // Find an instruction that updates the base register of the ld/st
181  // instruction.
182  bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
183  unsigned BaseReg, int Offset);
184 
185  // Merge a pre- or post-index base register update into a ld/st instruction.
187  mergeUpdateInsn(MachineBasicBlock::iterator I,
188  MachineBasicBlock::iterator Update, bool IsPreIdx);
189 
190  // Find and merge zero store instructions.
191  bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
192 
193  // Find and pair ldr/str instructions.
194  bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
195 
196  // Find and promote load instructions which read directly from store.
197  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
198 
199  // Find and merge a base register updates before or after a ld/st instruction.
200  bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
201 
202  bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
203 
204  bool runOnMachineFunction(MachineFunction &Fn) override;
205 
206  MachineFunctionProperties getRequiredProperties() const override {
209  }
210 
211  StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
212 };
213 
214 char AArch64LoadStoreOpt::ID = 0;
215 
216 } // end anonymous namespace
217 
218 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
219  AARCH64_LOAD_STORE_OPT_NAME, false, false)
220 
221 static bool isNarrowStore(unsigned Opc) {
222  switch (Opc) {
223  default:
224  return false;
225  case AArch64::STRBBui:
226  case AArch64::STURBBi:
227  case AArch64::STRHHui:
228  case AArch64::STURHHi:
229  return true;
230  }
231 }
232 
233 // These instruction set memory tag and either keep memory contents unchanged or
234 // set it to zero, ignoring the address part of the source register.
235 static bool isTagStore(const MachineInstr &MI) {
236  switch (MI.getOpcode()) {
237  default:
238  return false;
239  case AArch64::STGOffset:
240  case AArch64::STZGOffset:
241  case AArch64::ST2GOffset:
242  case AArch64::STZ2GOffset:
243  return true;
244  }
245 }
246 
247 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
248  bool *IsValidLdStrOpc = nullptr) {
249  if (IsValidLdStrOpc)
250  *IsValidLdStrOpc = true;
251  switch (Opc) {
252  default:
253  if (IsValidLdStrOpc)
254  *IsValidLdStrOpc = false;
256  case AArch64::STRDui:
257  case AArch64::STURDi:
258  case AArch64::STRDpre:
259  case AArch64::STRQui:
260  case AArch64::STURQi:
261  case AArch64::STRQpre:
262  case AArch64::STRBBui:
263  case AArch64::STURBBi:
264  case AArch64::STRHHui:
265  case AArch64::STURHHi:
266  case AArch64::STRWui:
267  case AArch64::STRWpre:
268  case AArch64::STURWi:
269  case AArch64::STRXui:
270  case AArch64::STRXpre:
271  case AArch64::STURXi:
272  case AArch64::LDRDui:
273  case AArch64::LDURDi:
274  case AArch64::LDRDpre:
275  case AArch64::LDRQui:
276  case AArch64::LDURQi:
277  case AArch64::LDRQpre:
278  case AArch64::LDRWui:
279  case AArch64::LDURWi:
280  case AArch64::LDRWpre:
281  case AArch64::LDRXui:
282  case AArch64::LDURXi:
283  case AArch64::LDRXpre:
284  case AArch64::STRSui:
285  case AArch64::STURSi:
286  case AArch64::STRSpre:
287  case AArch64::LDRSui:
288  case AArch64::LDURSi:
289  case AArch64::LDRSpre:
290  return Opc;
291  case AArch64::LDRSWui:
292  return AArch64::LDRWui;
293  case AArch64::LDURSWi:
294  return AArch64::LDURWi;
295  }
296 }
297 
298 static unsigned getMatchingWideOpcode(unsigned Opc) {
299  switch (Opc) {
300  default:
301  llvm_unreachable("Opcode has no wide equivalent!");
302  case AArch64::STRBBui:
303  return AArch64::STRHHui;
304  case AArch64::STRHHui:
305  return AArch64::STRWui;
306  case AArch64::STURBBi:
307  return AArch64::STURHHi;
308  case AArch64::STURHHi:
309  return AArch64::STURWi;
310  case AArch64::STURWi:
311  return AArch64::STURXi;
312  case AArch64::STRWui:
313  return AArch64::STRXui;
314  }
315 }
316 
317 static unsigned getMatchingPairOpcode(unsigned Opc) {
318  switch (Opc) {
319  default:
320  llvm_unreachable("Opcode has no pairwise equivalent!");
321  case AArch64::STRSui:
322  case AArch64::STURSi:
323  return AArch64::STPSi;
324  case AArch64::STRSpre:
325  return AArch64::STPSpre;
326  case AArch64::STRDui:
327  case AArch64::STURDi:
328  return AArch64::STPDi;
329  case AArch64::STRDpre:
330  return AArch64::STPDpre;
331  case AArch64::STRQui:
332  case AArch64::STURQi:
333  return AArch64::STPQi;
334  case AArch64::STRQpre:
335  return AArch64::STPQpre;
336  case AArch64::STRWui:
337  case AArch64::STURWi:
338  return AArch64::STPWi;
339  case AArch64::STRWpre:
340  return AArch64::STPWpre;
341  case AArch64::STRXui:
342  case AArch64::STURXi:
343  return AArch64::STPXi;
344  case AArch64::STRXpre:
345  return AArch64::STPXpre;
346  case AArch64::LDRSui:
347  case AArch64::LDURSi:
348  return AArch64::LDPSi;
349  case AArch64::LDRSpre:
350  return AArch64::LDPSpre;
351  case AArch64::LDRDui:
352  case AArch64::LDURDi:
353  return AArch64::LDPDi;
354  case AArch64::LDRDpre:
355  return AArch64::LDPDpre;
356  case AArch64::LDRQui:
357  case AArch64::LDURQi:
358  return AArch64::LDPQi;
359  case AArch64::LDRQpre:
360  return AArch64::LDPQpre;
361  case AArch64::LDRWui:
362  case AArch64::LDURWi:
363  return AArch64::LDPWi;
364  case AArch64::LDRWpre:
365  return AArch64::LDPWpre;
366  case AArch64::LDRXui:
367  case AArch64::LDURXi:
368  return AArch64::LDPXi;
369  case AArch64::LDRXpre:
370  return AArch64::LDPXpre;
371  case AArch64::LDRSWui:
372  case AArch64::LDURSWi:
373  return AArch64::LDPSWi;
374  }
375 }
376 
379  unsigned LdOpc = LoadInst.getOpcode();
380  unsigned StOpc = StoreInst.getOpcode();
381  switch (LdOpc) {
382  default:
383  llvm_unreachable("Unsupported load instruction!");
384  case AArch64::LDRBBui:
385  return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
386  StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
387  case AArch64::LDURBBi:
388  return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
389  StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
390  case AArch64::LDRHHui:
391  return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
392  StOpc == AArch64::STRXui;
393  case AArch64::LDURHHi:
394  return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
395  StOpc == AArch64::STURXi;
396  case AArch64::LDRWui:
397  return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
398  case AArch64::LDURWi:
399  return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
400  case AArch64::LDRXui:
401  return StOpc == AArch64::STRXui;
402  case AArch64::LDURXi:
403  return StOpc == AArch64::STURXi;
404  }
405 }
406 
407 static unsigned getPreIndexedOpcode(unsigned Opc) {
408  // FIXME: We don't currently support creating pre-indexed loads/stores when
409  // the load or store is the unscaled version. If we decide to perform such an
410  // optimization in the future the cases for the unscaled loads/stores will
411  // need to be added here.
412  switch (Opc) {
413  default:
414  llvm_unreachable("Opcode has no pre-indexed equivalent!");
415  case AArch64::STRSui:
416  return AArch64::STRSpre;
417  case AArch64::STRDui:
418  return AArch64::STRDpre;
419  case AArch64::STRQui:
420  return AArch64::STRQpre;
421  case AArch64::STRBBui:
422  return AArch64::STRBBpre;
423  case AArch64::STRHHui:
424  return AArch64::STRHHpre;
425  case AArch64::STRWui:
426  return AArch64::STRWpre;
427  case AArch64::STRXui:
428  return AArch64::STRXpre;
429  case AArch64::LDRSui:
430  return AArch64::LDRSpre;
431  case AArch64::LDRDui:
432  return AArch64::LDRDpre;
433  case AArch64::LDRQui:
434  return AArch64::LDRQpre;
435  case AArch64::LDRBBui:
436  return AArch64::LDRBBpre;
437  case AArch64::LDRHHui:
438  return AArch64::LDRHHpre;
439  case AArch64::LDRWui:
440  return AArch64::LDRWpre;
441  case AArch64::LDRXui:
442  return AArch64::LDRXpre;
443  case AArch64::LDRSWui:
444  return AArch64::LDRSWpre;
445  case AArch64::LDPSi:
446  return AArch64::LDPSpre;
447  case AArch64::LDPSWi:
448  return AArch64::LDPSWpre;
449  case AArch64::LDPDi:
450  return AArch64::LDPDpre;
451  case AArch64::LDPQi:
452  return AArch64::LDPQpre;
453  case AArch64::LDPWi:
454  return AArch64::LDPWpre;
455  case AArch64::LDPXi:
456  return AArch64::LDPXpre;
457  case AArch64::STPSi:
458  return AArch64::STPSpre;
459  case AArch64::STPDi:
460  return AArch64::STPDpre;
461  case AArch64::STPQi:
462  return AArch64::STPQpre;
463  case AArch64::STPWi:
464  return AArch64::STPWpre;
465  case AArch64::STPXi:
466  return AArch64::STPXpre;
467  case AArch64::STGOffset:
468  return AArch64::STGPreIndex;
469  case AArch64::STZGOffset:
470  return AArch64::STZGPreIndex;
471  case AArch64::ST2GOffset:
472  return AArch64::ST2GPreIndex;
473  case AArch64::STZ2GOffset:
474  return AArch64::STZ2GPreIndex;
475  case AArch64::STGPi:
476  return AArch64::STGPpre;
477  }
478 }
479 
480 static unsigned getPostIndexedOpcode(unsigned Opc) {
481  switch (Opc) {
482  default:
483  llvm_unreachable("Opcode has no post-indexed wise equivalent!");
484  case AArch64::STRSui:
485  case AArch64::STURSi:
486  return AArch64::STRSpost;
487  case AArch64::STRDui:
488  case AArch64::STURDi:
489  return AArch64::STRDpost;
490  case AArch64::STRQui:
491  case AArch64::STURQi:
492  return AArch64::STRQpost;
493  case AArch64::STRBBui:
494  return AArch64::STRBBpost;
495  case AArch64::STRHHui:
496  return AArch64::STRHHpost;
497  case AArch64::STRWui:
498  case AArch64::STURWi:
499  return AArch64::STRWpost;
500  case AArch64::STRXui:
501  case AArch64::STURXi:
502  return AArch64::STRXpost;
503  case AArch64::LDRSui:
504  case AArch64::LDURSi:
505  return AArch64::LDRSpost;
506  case AArch64::LDRDui:
507  case AArch64::LDURDi:
508  return AArch64::LDRDpost;
509  case AArch64::LDRQui:
510  case AArch64::LDURQi:
511  return AArch64::LDRQpost;
512  case AArch64::LDRBBui:
513  return AArch64::LDRBBpost;
514  case AArch64::LDRHHui:
515  return AArch64::LDRHHpost;
516  case AArch64::LDRWui:
517  case AArch64::LDURWi:
518  return AArch64::LDRWpost;
519  case AArch64::LDRXui:
520  case AArch64::LDURXi:
521  return AArch64::LDRXpost;
522  case AArch64::LDRSWui:
523  return AArch64::LDRSWpost;
524  case AArch64::LDPSi:
525  return AArch64::LDPSpost;
526  case AArch64::LDPSWi:
527  return AArch64::LDPSWpost;
528  case AArch64::LDPDi:
529  return AArch64::LDPDpost;
530  case AArch64::LDPQi:
531  return AArch64::LDPQpost;
532  case AArch64::LDPWi:
533  return AArch64::LDPWpost;
534  case AArch64::LDPXi:
535  return AArch64::LDPXpost;
536  case AArch64::STPSi:
537  return AArch64::STPSpost;
538  case AArch64::STPDi:
539  return AArch64::STPDpost;
540  case AArch64::STPQi:
541  return AArch64::STPQpost;
542  case AArch64::STPWi:
543  return AArch64::STPWpost;
544  case AArch64::STPXi:
545  return AArch64::STPXpost;
546  case AArch64::STGOffset:
547  return AArch64::STGPostIndex;
548  case AArch64::STZGOffset:
549  return AArch64::STZGPostIndex;
550  case AArch64::ST2GOffset:
551  return AArch64::ST2GPostIndex;
552  case AArch64::STZ2GOffset:
553  return AArch64::STZ2GPostIndex;
554  case AArch64::STGPi:
555  return AArch64::STGPpost;
556  }
557 }
558 
559 static bool isPairedLdSt(const MachineInstr &MI) {
560  switch (MI.getOpcode()) {
561  default:
562  return false;
563  case AArch64::LDPSi:
564  case AArch64::LDPSWi:
565  case AArch64::LDPDi:
566  case AArch64::LDPQi:
567  case AArch64::LDPWi:
568  case AArch64::LDPXi:
569  case AArch64::STPSi:
570  case AArch64::STPDi:
571  case AArch64::STPQi:
572  case AArch64::STPWi:
573  case AArch64::STPXi:
574  case AArch64::STGPi:
575  return true;
576  }
577 }
578 
580 
581  unsigned OpcA = FirstMI.getOpcode();
582  unsigned OpcB = MI.getOpcode();
583 
584  switch (OpcA) {
585  default:
586  return false;
587  case AArch64::STRSpre:
588  return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
589  case AArch64::STRDpre:
590  return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
591  case AArch64::STRQpre:
592  return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
593  case AArch64::STRWpre:
594  return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
595  case AArch64::STRXpre:
596  return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
597  case AArch64::LDRSpre:
598  return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
599  case AArch64::LDRDpre:
600  return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
601  case AArch64::LDRQpre:
602  return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
603  case AArch64::LDRWpre:
604  return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
605  case AArch64::LDRXpre:
606  return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
607  }
608 }
609 
610 // Returns the scale and offset range of pre/post indexed variants of MI.
611 static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
612  int &MinOffset, int &MaxOffset) {
613  bool IsPaired = isPairedLdSt(MI);
614  bool IsTagStore = isTagStore(MI);
615  // ST*G and all paired ldst have the same scale in pre/post-indexed variants
616  // as in the "unsigned offset" variant.
617  // All other pre/post indexed ldst instructions are unscaled.
618  Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
619 
620  if (IsPaired) {
621  MinOffset = -64;
622  MaxOffset = 63;
623  } else {
624  MinOffset = -256;
625  MaxOffset = 255;
626  }
627 }
628 
630  unsigned PairedRegOp = 0) {
631  assert(PairedRegOp < 2 && "Unexpected register operand idx.");
632  bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
633  if (IsPreLdSt)
634  PairedRegOp += 1;
635  unsigned Idx = isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
636  return MI.getOperand(Idx);
637 }
638 
640  unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1;
641  return MI.getOperand(Idx);
642 }
643 
645  unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2;
646  return MI.getOperand(Idx);
647 }
648 
651  const AArch64InstrInfo *TII) {
652  assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
653  int LoadSize = TII->getMemScale(LoadInst);
654  int StoreSize = TII->getMemScale(StoreInst);
655  int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst)
657  : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
658  int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst)
660  : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
661  return (UnscaledStOffset <= UnscaledLdOffset) &&
662  (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
663 }
664 
666  unsigned Opc = MI.getOpcode();
667  return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
668  isNarrowStore(Opc)) &&
669  getLdStRegOp(MI).getReg() == AArch64::WZR;
670 }
671 
673  switch (MI.getOpcode()) {
674  default:
675  return false;
676  // Scaled instructions.
677  case AArch64::LDRBBui:
678  case AArch64::LDRHHui:
679  case AArch64::LDRWui:
680  case AArch64::LDRXui:
681  // Unscaled instructions.
682  case AArch64::LDURBBi:
683  case AArch64::LDURHHi:
684  case AArch64::LDURWi:
685  case AArch64::LDURXi:
686  return true;
687  }
688 }
689 
691  unsigned Opc = MI.getOpcode();
692  switch (Opc) {
693  default:
694  return false;
695  // Scaled instructions.
696  case AArch64::STRSui:
697  case AArch64::STRDui:
698  case AArch64::STRQui:
699  case AArch64::STRXui:
700  case AArch64::STRWui:
701  case AArch64::STRHHui:
702  case AArch64::STRBBui:
703  case AArch64::LDRSui:
704  case AArch64::LDRDui:
705  case AArch64::LDRQui:
706  case AArch64::LDRXui:
707  case AArch64::LDRWui:
708  case AArch64::LDRHHui:
709  case AArch64::LDRBBui:
710  case AArch64::STGOffset:
711  case AArch64::STZGOffset:
712  case AArch64::ST2GOffset:
713  case AArch64::STZ2GOffset:
714  case AArch64::STGPi:
715  // Unscaled instructions.
716  case AArch64::STURSi:
717  case AArch64::STURDi:
718  case AArch64::STURQi:
719  case AArch64::STURWi:
720  case AArch64::STURXi:
721  case AArch64::LDURSi:
722  case AArch64::LDURDi:
723  case AArch64::LDURQi:
724  case AArch64::LDURWi:
725  case AArch64::LDURXi:
726  // Paired instructions.
727  case AArch64::LDPSi:
728  case AArch64::LDPSWi:
729  case AArch64::LDPDi:
730  case AArch64::LDPQi:
731  case AArch64::LDPWi:
732  case AArch64::LDPXi:
733  case AArch64::STPSi:
734  case AArch64::STPDi:
735  case AArch64::STPQi:
736  case AArch64::STPWi:
737  case AArch64::STPXi:
738  // Make sure this is a reg+imm (as opposed to an address reloc).
739  if (!getLdStOffsetOp(MI).isImm())
740  return false;
741 
742  return true;
743  }
744 }
745 
747 AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
749  const LdStPairFlags &Flags) {
751  "Expected promotable zero stores.");
752 
753  MachineBasicBlock::iterator E = I->getParent()->end();
755  // If NextI is the second of the two instructions to be merged, we need
756  // to skip one further. Either way we merge will invalidate the iterator,
757  // and we don't need to scan the new instruction, as it's a pairwise
758  // instruction, which we're not considering for further action anyway.
759  if (NextI == MergeMI)
760  NextI = next_nodbg(NextI, E);
761 
762  unsigned Opc = I->getOpcode();
763  bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
764  int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
765 
766  bool MergeForward = Flags.getMergeForward();
767  // Insert our new paired instruction after whichever of the paired
768  // instructions MergeForward indicates.
769  MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
770  // Also based on MergeForward is from where we copy the base register operand
771  // so we get the flags compatible with the input code.
772  const MachineOperand &BaseRegOp =
773  MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
774 
775  // Which register is Rt and which is Rt2 depends on the offset order.
776  MachineInstr *RtMI;
777  if (getLdStOffsetOp(*I).getImm() ==
778  getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
779  RtMI = &*MergeMI;
780  else
781  RtMI = &*I;
782 
783  int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
784  // Change the scaled offset from small to large type.
785  if (IsScaled) {
786  assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
787  OffsetImm /= 2;
788  }
789 
790  // Construct the new instruction.
791  DebugLoc DL = I->getDebugLoc();
792  MachineBasicBlock *MBB = I->getParent();
794  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
795  .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
796  .add(BaseRegOp)
797  .addImm(OffsetImm)
798  .cloneMergedMemRefs({&*I, &*MergeMI})
799  .setMIFlags(I->mergeFlagsWith(*MergeMI));
800  (void)MIB;
801 
802  LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
803  LLVM_DEBUG(I->print(dbgs()));
804  LLVM_DEBUG(dbgs() << " ");
805  LLVM_DEBUG(MergeMI->print(dbgs()));
806  LLVM_DEBUG(dbgs() << " with instruction:\n ");
807  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
808  LLVM_DEBUG(dbgs() << "\n");
809 
810  // Erase the old instructions.
811  I->eraseFromParent();
812  MergeMI->eraseFromParent();
813  return NextI;
814 }
815 
816 // Apply Fn to all instructions between MI and the beginning of the block, until
817 // a def for DefReg is reached. Returns true, iff Fn returns true for all
818 // visited instructions. Stop after visiting Limit iterations.
820  const TargetRegisterInfo *TRI, unsigned Limit,
821  std::function<bool(MachineInstr &, bool)> &Fn) {
822  auto MBB = MI.getParent();
823  for (MachineInstr &I :
824  instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
825  if (!Limit)
826  return false;
827  --Limit;
828 
829  bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
830  return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
831  TRI->regsOverlap(MOP.getReg(), DefReg);
832  });
833  if (!Fn(I, isDef))
834  return false;
835  if (isDef)
836  break;
837  }
838  return true;
839 }
840 
842  const TargetRegisterInfo *TRI) {
843 
844  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
845  if (MOP.isReg() && MOP.isKill())
846  Units.removeReg(MOP.getReg());
847 
848  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
849  if (MOP.isReg() && !MOP.isKill())
850  Units.addReg(MOP.getReg());
851 }
852 
854 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
856  const LdStPairFlags &Flags) {
857  MachineBasicBlock::iterator E = I->getParent()->end();
859  // If NextI is the second of the two instructions to be merged, we need
860  // to skip one further. Either way we merge will invalidate the iterator,
861  // and we don't need to scan the new instruction, as it's a pairwise
862  // instruction, which we're not considering for further action anyway.
863  if (NextI == Paired)
864  NextI = next_nodbg(NextI, E);
865 
866  int SExtIdx = Flags.getSExtIdx();
867  unsigned Opc =
868  SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
869  bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
870  int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
871 
872  bool MergeForward = Flags.getMergeForward();
873 
874  Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
875  if (MergeForward && RenameReg) {
876  MCRegister RegToRename = getLdStRegOp(*I).getReg();
877  DefinedInBB.addReg(*RenameReg);
878 
879  // Return the sub/super register for RenameReg, matching the size of
880  // OriginalReg.
881  auto GetMatchingSubReg = [this,
882  RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
883  for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
884  if (TRI->getMinimalPhysRegClass(OriginalReg) ==
885  TRI->getMinimalPhysRegClass(SubOrSuper))
886  return SubOrSuper;
887  llvm_unreachable("Should have found matching sub or super register!");
888  };
889 
890  std::function<bool(MachineInstr &, bool)> UpdateMIs =
891  [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
892  if (IsDef) {
893  bool SeenDef = false;
894  for (auto &MOP : MI.operands()) {
895  // Rename the first explicit definition and all implicit
896  // definitions matching RegToRename.
897  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
898  (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
899  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
900  assert((MOP.isImplicit() ||
901  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
902  "Need renamable operands");
903  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
904  SeenDef = true;
905  }
906  }
907  } else {
908  for (auto &MOP : MI.operands()) {
909  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
910  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
911  assert((MOP.isImplicit() ||
912  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
913  "Need renamable operands");
914  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
915  }
916  }
917  }
918  LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
919  return true;
920  };
921  forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
922 
923 #if !defined(NDEBUG)
924  // Make sure the register used for renaming is not used between the paired
925  // instructions. That would trash the content before the new paired
926  // instruction.
927  for (auto &MI :
929  std::next(I), std::next(Paired)))
930  assert(all_of(MI.operands(),
931  [this, &RenameReg](const MachineOperand &MOP) {
932  return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
933  MOP.isUndef() ||
934  !TRI->regsOverlap(MOP.getReg(), *RenameReg);
935  }) &&
936  "Rename register used between paired instruction, trashing the "
937  "content");
938 #endif
939  }
940 
941  // Insert our new paired instruction after whichever of the paired
942  // instructions MergeForward indicates.
943  MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
944  // Also based on MergeForward is from where we copy the base register operand
945  // so we get the flags compatible with the input code.
946  const MachineOperand &BaseRegOp =
947  MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
948 
949  int Offset = getLdStOffsetOp(*I).getImm();
950  int PairedOffset = getLdStOffsetOp(*Paired).getImm();
951  bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
952  if (IsUnscaled != PairedIsUnscaled) {
953  // We're trying to pair instructions that differ in how they are scaled. If
954  // I is scaled then scale the offset of Paired accordingly. Otherwise, do
955  // the opposite (i.e., make Paired's offset unscaled).
956  int MemSize = TII->getMemScale(*Paired);
957  if (PairedIsUnscaled) {
958  // If the unscaled offset isn't a multiple of the MemSize, we can't
959  // pair the operations together.
960  assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
961  "Offset should be a multiple of the stride!");
962  PairedOffset /= MemSize;
963  } else {
964  PairedOffset *= MemSize;
965  }
966  }
967 
968  // Which register is Rt and which is Rt2 depends on the offset order.
969  // However, for pre load/stores the Rt should be the one of the pre
970  // load/store.
971  MachineInstr *RtMI, *Rt2MI;
972  if (Offset == PairedOffset + OffsetStride &&
974  RtMI = &*Paired;
975  Rt2MI = &*I;
976  // Here we swapped the assumption made for SExtIdx.
977  // I.e., we turn ldp I, Paired into ldp Paired, I.
978  // Update the index accordingly.
979  if (SExtIdx != -1)
980  SExtIdx = (SExtIdx + 1) % 2;
981  } else {
982  RtMI = &*I;
983  Rt2MI = &*Paired;
984  }
985  int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
986  // Scale the immediate offset, if necessary.
987  if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
988  assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
989  "Unscaled offset cannot be scaled.");
990  OffsetImm /= TII->getMemScale(*RtMI);
991  }
992 
993  // Construct the new instruction.
995  DebugLoc DL = I->getDebugLoc();
996  MachineBasicBlock *MBB = I->getParent();
997  MachineOperand RegOp0 = getLdStRegOp(*RtMI);
998  MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
999  // Kill flags may become invalid when moving stores for pairing.
1000  if (RegOp0.isUse()) {
1001  if (!MergeForward) {
1002  // Clear kill flags on store if moving upwards. Example:
1003  // STRWui %w0, ...
1004  // USE %w1
1005  // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
1006  RegOp0.setIsKill(false);
1007  RegOp1.setIsKill(false);
1008  } else {
1009  // Clear kill flags of the first stores register. Example:
1010  // STRWui %w1, ...
1011  // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1012  // STRW %w0
1013  Register Reg = getLdStRegOp(*I).getReg();
1014  for (MachineInstr &MI : make_range(std::next(I), Paired))
1015  MI.clearRegisterKills(Reg, TRI);
1016  }
1017  }
1018 
1019  unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1020  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1021 
1022  // Adds the pre-index operand for pre-indexed ld/st pairs.
1023  if (AArch64InstrInfo::isPreLdSt(*RtMI))
1024  MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1025 
1026  MIB.add(RegOp0)
1027  .add(RegOp1)
1028  .add(BaseRegOp)
1029  .addImm(OffsetImm)
1030  .cloneMergedMemRefs({&*I, &*Paired})
1031  .setMIFlags(I->mergeFlagsWith(*Paired));
1032 
1033  (void)MIB;
1034 
1035  LLVM_DEBUG(
1036  dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1037  LLVM_DEBUG(I->print(dbgs()));
1038  LLVM_DEBUG(dbgs() << " ");
1039  LLVM_DEBUG(Paired->print(dbgs()));
1040  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1041  if (SExtIdx != -1) {
1042  // Generate the sign extension for the proper result of the ldp.
1043  // I.e., with X1, that would be:
1044  // %w1 = KILL %w1, implicit-def %x1
1045  // %x1 = SBFMXri killed %x1, 0, 31
1046  MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1047  // Right now, DstMO has the extended register, since it comes from an
1048  // extended opcode.
1049  Register DstRegX = DstMO.getReg();
1050  // Get the W variant of that register.
1051  Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1052  // Update the result of LDP to use the W instead of the X variant.
1053  DstMO.setReg(DstRegW);
1054  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1055  LLVM_DEBUG(dbgs() << "\n");
1056  // Make the machine verifier happy by providing a definition for
1057  // the X register.
1058  // Insert this definition right after the generated LDP, i.e., before
1059  // InsertionPoint.
1060  MachineInstrBuilder MIBKill =
1061  BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1062  .addReg(DstRegW)
1063  .addReg(DstRegX, RegState::Define);
1064  MIBKill->getOperand(2).setImplicit();
1065  // Create the sign extension.
1066  MachineInstrBuilder MIBSXTW =
1067  BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1068  .addReg(DstRegX)
1069  .addImm(0)
1070  .addImm(31);
1071  (void)MIBSXTW;
1072  LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1073  LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1074  } else {
1075  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1076  }
1077  LLVM_DEBUG(dbgs() << "\n");
1078 
1079  if (MergeForward)
1080  for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1081  if (MOP.isReg() && MOP.isKill())
1082  DefinedInBB.addReg(MOP.getReg());
1083 
1084  // Erase the old instructions.
1085  I->eraseFromParent();
1086  Paired->eraseFromParent();
1087 
1088  return NextI;
1089 }
1090 
1092 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1093  MachineBasicBlock::iterator StoreI) {
1095  next_nodbg(LoadI, LoadI->getParent()->end());
1096 
1097  int LoadSize = TII->getMemScale(*LoadI);
1098  int StoreSize = TII->getMemScale(*StoreI);
1099  Register LdRt = getLdStRegOp(*LoadI).getReg();
1100  const MachineOperand &StMO = getLdStRegOp(*StoreI);
1101  Register StRt = getLdStRegOp(*StoreI).getReg();
1102  bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1103 
1104  assert((IsStoreXReg ||
1105  TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1106  "Unexpected RegClass");
1107 
1108  MachineInstr *BitExtMI;
1109  if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1110  // Remove the load, if the destination register of the loads is the same
1111  // register for stored value.
1112  if (StRt == LdRt && LoadSize == 8) {
1113  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1114  LoadI->getIterator())) {
1115  if (MI.killsRegister(StRt, TRI)) {
1116  MI.clearRegisterKills(StRt, TRI);
1117  break;
1118  }
1119  }
1120  LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1121  LLVM_DEBUG(LoadI->print(dbgs()));
1122  LLVM_DEBUG(dbgs() << "\n");
1123  LoadI->eraseFromParent();
1124  return NextI;
1125  }
1126  // Replace the load with a mov if the load and store are in the same size.
1127  BitExtMI =
1128  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1129  TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1130  .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1131  .add(StMO)
1133  .setMIFlags(LoadI->getFlags());
1134  } else {
1135  // FIXME: Currently we disable this transformation in big-endian targets as
1136  // performance and correctness are verified only in little-endian.
1137  if (!Subtarget->isLittleEndian())
1138  return NextI;
1139  bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1140  assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1141  "Unsupported ld/st match");
1142  assert(LoadSize <= StoreSize && "Invalid load size");
1143  int UnscaledLdOffset = IsUnscaled
1144  ? getLdStOffsetOp(*LoadI).getImm()
1145  : getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1146  int UnscaledStOffset = IsUnscaled
1147  ? getLdStOffsetOp(*StoreI).getImm()
1148  : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1149  int Width = LoadSize * 8;
1150  Register DestReg =
1151  IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1152  LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1153  : LdRt;
1154 
1155  assert((UnscaledLdOffset >= UnscaledStOffset &&
1156  (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1157  "Invalid offset");
1158 
1159  int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1160  int Imms = Immr + Width - 1;
1161  if (UnscaledLdOffset == UnscaledStOffset) {
1162  uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1163  | ((Immr) << 6) // immr
1164  | ((Imms) << 0) // imms
1165  ;
1166 
1167  BitExtMI =
1168  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1169  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1170  DestReg)
1171  .add(StMO)
1172  .addImm(AndMaskEncoded)
1173  .setMIFlags(LoadI->getFlags());
1174  } else {
1175  BitExtMI =
1176  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1177  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1178  DestReg)
1179  .add(StMO)
1180  .addImm(Immr)
1181  .addImm(Imms)
1182  .setMIFlags(LoadI->getFlags());
1183  }
1184  }
1185 
1186  // Clear kill flags between store and load.
1187  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1188  BitExtMI->getIterator()))
1189  if (MI.killsRegister(StRt, TRI)) {
1190  MI.clearRegisterKills(StRt, TRI);
1191  break;
1192  }
1193 
1194  LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1195  LLVM_DEBUG(StoreI->print(dbgs()));
1196  LLVM_DEBUG(dbgs() << " ");
1197  LLVM_DEBUG(LoadI->print(dbgs()));
1198  LLVM_DEBUG(dbgs() << " with instructions:\n ");
1199  LLVM_DEBUG(StoreI->print(dbgs()));
1200  LLVM_DEBUG(dbgs() << " ");
1201  LLVM_DEBUG((BitExtMI)->print(dbgs()));
1202  LLVM_DEBUG(dbgs() << "\n");
1203 
1204  // Erase the old instructions.
1205  LoadI->eraseFromParent();
1206  return NextI;
1207 }
1208 
1209 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1210  // Convert the byte-offset used by unscaled into an "element" offset used
1211  // by the scaled pair load/store instructions.
1212  if (IsUnscaled) {
1213  // If the byte-offset isn't a multiple of the stride, there's no point
1214  // trying to match it.
1215  if (Offset % OffsetStride)
1216  return false;
1217  Offset /= OffsetStride;
1218  }
1219  return Offset <= 63 && Offset >= -64;
1220 }
1221 
1222 // Do alignment, specialized to power of 2 and for signed ints,
1223 // avoiding having to do a C-style cast from uint_64t to int when
1224 // using alignTo from include/llvm/Support/MathExtras.h.
1225 // FIXME: Move this function to include/MathExtras.h?
1226 static int alignTo(int Num, int PowOf2) {
1227  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1228 }
1229 
1230 static bool mayAlias(MachineInstr &MIa,
1232  AliasAnalysis *AA) {
1233  for (MachineInstr *MIb : MemInsns)
1234  if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
1235  return true;
1236 
1237  return false;
1238 }
1239 
1240 bool AArch64LoadStoreOpt::findMatchingStore(
1241  MachineBasicBlock::iterator I, unsigned Limit,
1242  MachineBasicBlock::iterator &StoreI) {
1243  MachineBasicBlock::iterator B = I->getParent()->begin();
1245  MachineInstr &LoadMI = *I;
1246  Register BaseReg = getLdStBaseOp(LoadMI).getReg();
1247 
1248  // If the load is the first instruction in the block, there's obviously
1249  // not any matching store.
1250  if (MBBI == B)
1251  return false;
1252 
1253  // Track which register units have been modified and used between the first
1254  // insn and the second insn.
1255  ModifiedRegUnits.clear();
1256  UsedRegUnits.clear();
1257 
1258  unsigned Count = 0;
1259  do {
1260  MBBI = prev_nodbg(MBBI, B);
1261  MachineInstr &MI = *MBBI;
1262 
1263  // Don't count transient instructions towards the search limit since there
1264  // may be different numbers of them if e.g. debug information is present.
1265  if (!MI.isTransient())
1266  ++Count;
1267 
1268  // If the load instruction reads directly from the address to which the
1269  // store instruction writes and the stored value is not modified, we can
1270  // promote the load. Since we do not handle stores with pre-/post-index,
1271  // it's unnecessary to check if BaseReg is modified by the store itself.
1272  // Also we can't handle stores without an immediate offset operand,
1273  // while the operand might be the address for a global variable.
1274  if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1275  BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
1276  isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1277  ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1278  StoreI = MBBI;
1279  return true;
1280  }
1281 
1282  if (MI.isCall())
1283  return false;
1284 
1285  // Update modified / uses register units.
1286  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1287 
1288  // Otherwise, if the base register is modified, we have no match, so
1289  // return early.
1290  if (!ModifiedRegUnits.available(BaseReg))
1291  return false;
1292 
1293  // If we encounter a store aliased with the load, return early.
1294  if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1295  return false;
1296  } while (MBBI != B && Count < Limit);
1297  return false;
1298 }
1299 
1300 // Returns true if FirstMI and MI are candidates for merging or pairing.
1301 // Otherwise, returns false.
1303  LdStPairFlags &Flags,
1304  const AArch64InstrInfo *TII) {
1305  // If this is volatile or if pairing is suppressed, not a candidate.
1306  if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1307  return false;
1308 
1309  // We should have already checked FirstMI for pair suppression and volatility.
1310  assert(!FirstMI.hasOrderedMemoryRef() &&
1311  !TII->isLdStPairSuppressed(FirstMI) &&
1312  "FirstMI shouldn't get here if either of these checks are true.");
1313 
1314  unsigned OpcA = FirstMI.getOpcode();
1315  unsigned OpcB = MI.getOpcode();
1316 
1317  // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1318  if (OpcA == OpcB)
1319  return !AArch64InstrInfo::isPreLdSt(FirstMI);
1320 
1321  // Try to match a sign-extended load/store with a zero-extended load/store.
1322  bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1323  unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1324  assert(IsValidLdStrOpc &&
1325  "Given Opc should be a Load or Store with an immediate");
1326  // OpcA will be the first instruction in the pair.
1327  if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1328  Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
1329  return true;
1330  }
1331 
1332  // If the second instruction isn't even a mergable/pairable load/store, bail
1333  // out.
1334  if (!PairIsValidLdStrOpc)
1335  return false;
1336 
1337  // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
1338  // offsets.
1339  if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1340  return false;
1341 
1342  // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1343  // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
1344  // are candidate pairs that can be merged.
1345  if (isPreLdStPairCandidate(FirstMI, MI))
1346  return true;
1347 
1348  // Try to match an unscaled load/store with a scaled load/store.
1349  return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1351 
1352  // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1353 }
1354 
1355 static bool
1356 canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
1358  const TargetRegisterInfo *TRI) {
1359  if (!FirstMI.mayStore())
1360  return false;
1361 
1362  // Check if we can find an unused register which we can use to rename
1363  // the register used by the first load/store.
1364  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1365  MachineFunction &MF = *FirstMI.getParent()->getParent();
1366  if (!RegClass || !MF.getRegInfo().tracksLiveness())
1367  return false;
1368 
1369  auto RegToRename = getLdStRegOp(FirstMI).getReg();
1370  // For now, we only rename if the store operand gets killed at the store.
1371  if (!getLdStRegOp(FirstMI).isKill() &&
1372  !any_of(FirstMI.operands(),
1373  [TRI, RegToRename](const MachineOperand &MOP) {
1374  return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1375  MOP.isImplicit() && MOP.isKill() &&
1376  TRI->regsOverlap(RegToRename, MOP.getReg());
1377  })) {
1378  LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
1379  return false;
1380  }
1381  auto canRenameMOP = [TRI](const MachineOperand &MOP) {
1382  if (MOP.isReg()) {
1383  auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1384  // Renaming registers with multiple disjunct sub-registers (e.g. the
1385  // result of a LD3) means that all sub-registers are renamed, potentially
1386  // impacting other instructions we did not check. Bail out.
1387  // Note that this relies on the structure of the AArch64 register file. In
1388  // particular, a subregister cannot be written without overwriting the
1389  // whole register.
1390  if (RegClass->HasDisjunctSubRegs) {
1391  LLVM_DEBUG(
1392  dbgs()
1393  << " Cannot rename operands with multiple disjunct subregisters ("
1394  << MOP << ")\n");
1395  return false;
1396  }
1397  }
1398  return MOP.isImplicit() ||
1399  (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1400  };
1401 
1402  bool FoundDef = false;
1403 
1404  // For each instruction between FirstMI and the previous def for RegToRename,
1405  // we
1406  // * check if we can rename RegToRename in this instruction
1407  // * collect the registers used and required register classes for RegToRename.
1408  std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1409  bool IsDef) {
1410  LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
1411  // Currently we do not try to rename across frame-setup instructions.
1412  if (MI.getFlag(MachineInstr::FrameSetup)) {
1413  LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
1414  << MI << ")\n");
1415  return false;
1416  }
1417 
1418  UsedInBetween.accumulate(MI);
1419 
1420  // For a definition, check that we can rename the definition and exit the
1421  // loop.
1422  FoundDef = IsDef;
1423 
1424  // For defs, check if we can rename the first def of RegToRename.
1425  if (FoundDef) {
1426  // For some pseudo instructions, we might not generate code in the end
1427  // (e.g. KILL) and we would end up without a correct def for the rename
1428  // register.
1429  // TODO: This might be overly conservative and we could handle those cases
1430  // in multiple ways:
1431  // 1. Insert an extra copy, to materialize the def.
1432  // 2. Skip pseudo-defs until we find an non-pseudo def.
1433  if (MI.isPseudo()) {
1434  LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI
1435  << "\n");
1436  return false;
1437  }
1438 
1439  for (auto &MOP : MI.operands()) {
1440  if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1441  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1442  continue;
1443  if (!canRenameMOP(MOP)) {
1444  LLVM_DEBUG(dbgs()
1445  << " Cannot rename " << MOP << " in " << MI << "\n");
1446  return false;
1447  }
1448  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1449  }
1450  return true;
1451  } else {
1452  for (auto &MOP : MI.operands()) {
1453  if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1454  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1455  continue;
1456 
1457  if (!canRenameMOP(MOP)) {
1458  LLVM_DEBUG(dbgs()
1459  << " Cannot rename " << MOP << " in " << MI << "\n");
1460  return false;
1461  }
1462  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1463  }
1464  }
1465  return true;
1466  };
1467 
1468  if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1469  return false;
1470 
1471  if (!FoundDef) {
1472  LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1473  return false;
1474  }
1475  return true;
1476 }
1477 
1478 // Check if we can find a physical register for renaming \p Reg. This register
1479 // must:
1480 // * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1481 // defined registers up to the point where the renamed register will be used,
1482 // * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1483 // registers in the range the rename register will be used,
1484 // * is available in all used register classes (checked using RequiredClasses).
1486  const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1487  LiveRegUnits &UsedInBetween,
1489  const TargetRegisterInfo *TRI) {
1490  const MachineRegisterInfo &RegInfo = MF.getRegInfo();
1491 
1492  // Checks if any sub- or super-register of PR is callee saved.
1493  auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1495  [&MF, TRI](MCPhysReg SubOrSuper) {
1496  return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1497  });
1498  };
1499 
1500  // Check if PR or one of its sub- or super-registers can be used for all
1501  // required register classes.
1502  auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1503  return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1505  [C, TRI](MCPhysReg SubOrSuper) {
1506  return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1507  });
1508  });
1509  };
1510 
1511  auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1512  for (const MCPhysReg &PR : *RegClass) {
1513  if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1514  !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1515  CanBeUsedForAllClasses(PR)) {
1516  DefinedInBB.addReg(PR);
1517  LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1518  << "\n");
1519  return {PR};
1520  }
1521  }
1522  LLVM_DEBUG(dbgs() << "No rename register found from "
1523  << TRI->getRegClassName(RegClass) << "\n");
1524  return None;
1525 }
1526 
1527 /// Scan the instructions looking for a load/store that can be combined with the
1528 /// current instruction into a wider equivalent or a load/store pair.
1530 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1531  LdStPairFlags &Flags, unsigned Limit,
1532  bool FindNarrowMerge) {
1533  MachineBasicBlock::iterator E = I->getParent()->end();
1535  MachineBasicBlock::iterator MBBIWithRenameReg;
1536  MachineInstr &FirstMI = *I;
1537  MBBI = next_nodbg(MBBI, E);
1538 
1539  bool MayLoad = FirstMI.mayLoad();
1540  bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1541  Register Reg = getLdStRegOp(FirstMI).getReg();
1542  Register BaseReg = getLdStBaseOp(FirstMI).getReg();
1543  int Offset = getLdStOffsetOp(FirstMI).getImm();
1544  int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1545  bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1546 
1547  Optional<bool> MaybeCanRename = None;
1548  if (!EnableRenaming)
1549  MaybeCanRename = {false};
1550 
1552  LiveRegUnits UsedInBetween;
1553  UsedInBetween.init(*TRI);
1554 
1555  Flags.clearRenameReg();
1556 
1557  // Track which register units have been modified and used between the first
1558  // insn (inclusive) and the second insn.
1559  ModifiedRegUnits.clear();
1560  UsedRegUnits.clear();
1561 
1562  // Remember any instructions that read/write memory between FirstMI and MI.
1564 
1565  for (unsigned Count = 0; MBBI != E && Count < Limit;
1566  MBBI = next_nodbg(MBBI, E)) {
1567  MachineInstr &MI = *MBBI;
1568 
1569  UsedInBetween.accumulate(MI);
1570 
1571  // Don't count transient instructions towards the search limit since there
1572  // may be different numbers of them if e.g. debug information is present.
1573  if (!MI.isTransient())
1574  ++Count;
1575 
1576  Flags.setSExtIdx(-1);
1577  if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
1578  getLdStOffsetOp(MI).isImm()) {
1579  assert(MI.mayLoadOrStore() && "Expected memory operation.");
1580  // If we've found another instruction with the same opcode, check to see
1581  // if the base and offset are compatible with our starting instruction.
1582  // These instructions all have scaled immediate operands, so we just
1583  // check for +1/-1. Make sure to check the new instruction offset is
1584  // actually an immediate and not a symbolic reference destined for
1585  // a relocation.
1586  Register MIBaseReg = getLdStBaseOp(MI).getReg();
1587  int MIOffset = getLdStOffsetOp(MI).getImm();
1588  bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
1589  if (IsUnscaled != MIIsUnscaled) {
1590  // We're trying to pair instructions that differ in how they are scaled.
1591  // If FirstMI is scaled then scale the offset of MI accordingly.
1592  // Otherwise, do the opposite (i.e., make MI's offset unscaled).
1593  int MemSize = TII->getMemScale(MI);
1594  if (MIIsUnscaled) {
1595  // If the unscaled offset isn't a multiple of the MemSize, we can't
1596  // pair the operations together: bail and keep looking.
1597  if (MIOffset % MemSize) {
1598  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1599  UsedRegUnits, TRI);
1600  MemInsns.push_back(&MI);
1601  continue;
1602  }
1603  MIOffset /= MemSize;
1604  } else {
1605  MIOffset *= MemSize;
1606  }
1607  }
1608 
1609  bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
1610 
1611  if (BaseReg == MIBaseReg) {
1612  // If the offset of the second ld/st is not equal to the size of the
1613  // destination register it can’t be paired with a pre-index ld/st
1614  // pair. Additionally if the base reg is used or modified the operations
1615  // can't be paired: bail and keep looking.
1616  if (IsPreLdSt) {
1617  bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
1618  bool IsBaseRegUsed =
1619  !UsedRegUnits.available(getLdStBaseOp(MI).getReg());
1620  bool IsBaseRegModified =
1621  !ModifiedRegUnits.available(getLdStBaseOp(MI).getReg());
1622  // If the stored value and the address of the second instruction is
1623  // the same, it needs to be using the updated register and therefore
1624  // it must not be folded.
1625  bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(),
1626  getLdStBaseOp(MI).getReg());
1627  if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1628  IsMIRegTheSame) {
1629  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1630  UsedRegUnits, TRI);
1631  MemInsns.push_back(&MI);
1632  continue;
1633  }
1634  } else {
1635  if ((Offset != MIOffset + OffsetStride) &&
1636  (Offset + OffsetStride != MIOffset)) {
1637  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1638  UsedRegUnits, TRI);
1639  MemInsns.push_back(&MI);
1640  continue;
1641  }
1642  }
1643 
1644  int MinOffset = Offset < MIOffset ? Offset : MIOffset;
1645  if (FindNarrowMerge) {
1646  // If the alignment requirements of the scaled wide load/store
1647  // instruction can't express the offset of the scaled narrow input,
1648  // bail and keep looking. For promotable zero stores, allow only when
1649  // the stored value is the same (i.e., WZR).
1650  if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1651  (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
1652  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1653  UsedRegUnits, TRI);
1654  MemInsns.push_back(&MI);
1655  continue;
1656  }
1657  } else {
1658  // Pairwise instructions have a 7-bit signed offset field. Single
1659  // insns have a 12-bit unsigned offset field. If the resultant
1660  // immediate offset of merging these instructions is out of range for
1661  // a pairwise instruction, bail and keep looking.
1662  if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1663  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1664  UsedRegUnits, TRI);
1665  MemInsns.push_back(&MI);
1666  continue;
1667  }
1668  // If the alignment requirements of the paired (scaled) instruction
1669  // can't express the offset of the unscaled input, bail and keep
1670  // looking.
1671  if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
1672  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1673  UsedRegUnits, TRI);
1674  MemInsns.push_back(&MI);
1675  continue;
1676  }
1677  }
1678  // If the destination register of one load is the same register or a
1679  // sub/super register of the other load, bail and keep looking. A
1680  // load-pair instruction with both destination registers the same is
1681  // UNPREDICTABLE and will result in an exception.
1682  if (MayLoad &&
1684  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1685  TRI);
1686  MemInsns.push_back(&MI);
1687  continue;
1688  }
1689 
1690  // If the BaseReg has been modified, then we cannot do the optimization.
1691  // For example, in the following pattern
1692  // ldr x1 [x2]
1693  // ldr x2 [x3]
1694  // ldr x4 [x2, #8],
1695  // the first and third ldr cannot be converted to ldp x1, x4, [x2]
1696  if (!ModifiedRegUnits.available(BaseReg))
1697  return E;
1698 
1699  // If the Rt of the second instruction was not modified or used between
1700  // the two instructions and none of the instructions between the second
1701  // and first alias with the second, we can combine the second into the
1702  // first.
1703  if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
1704  !(MI.mayLoad() &&
1705  !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
1706  !mayAlias(MI, MemInsns, AA)) {
1707 
1708  Flags.setMergeForward(false);
1709  Flags.clearRenameReg();
1710  return MBBI;
1711  }
1712 
1713  // Likewise, if the Rt of the first instruction is not modified or used
1714  // between the two instructions and none of the instructions between the
1715  // first and the second alias with the first, we can combine the first
1716  // into the second.
1717  if (!(MayLoad &&
1718  !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
1719  !mayAlias(FirstMI, MemInsns, AA)) {
1720 
1721  if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
1722  Flags.setMergeForward(true);
1723  Flags.clearRenameReg();
1724  return MBBI;
1725  }
1726 
1727  if (DebugCounter::shouldExecute(RegRenamingCounter)) {
1728  if (!MaybeCanRename)
1729  MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
1730  RequiredClasses, TRI)};
1731 
1732  if (*MaybeCanRename) {
1734  *FirstMI.getParent()->getParent(), Reg, DefinedInBB,
1735  UsedInBetween, RequiredClasses, TRI);
1736  if (MaybeRenameReg) {
1737  Flags.setRenameReg(*MaybeRenameReg);
1738  Flags.setMergeForward(true);
1739  MBBIWithRenameReg = MBBI;
1740  }
1741  }
1742  }
1743  }
1744  // Unable to combine these instructions due to interference in between.
1745  // Keep looking.
1746  }
1747  }
1748 
1749  if (Flags.getRenameReg())
1750  return MBBIWithRenameReg;
1751 
1752  // If the instruction wasn't a matching load or store. Stop searching if we
1753  // encounter a call instruction that might modify memory.
1754  if (MI.isCall())
1755  return E;
1756 
1757  // Update modified / uses register units.
1758  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1759 
1760  // Otherwise, if the base register is modified, we have no match, so
1761  // return early.
1762  if (!ModifiedRegUnits.available(BaseReg))
1763  return E;
1764 
1765  // Update list of instructions that read/write memory.
1766  if (MI.mayLoadOrStore())
1767  MemInsns.push_back(&MI);
1768  }
1769  return E;
1770 }
1771 
1774  auto End = MI.getParent()->end();
1775  if (MaybeCFI == End ||
1776  MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
1777  !(MI.getFlag(MachineInstr::FrameSetup) ||
1778  MI.getFlag(MachineInstr::FrameDestroy)) ||
1779  getLdStBaseOp(MI).getReg() != AArch64::SP)
1780  return End;
1781 
1782  const MachineFunction &MF = *MI.getParent()->getParent();
1783  unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
1784  const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
1785  switch (CFI.getOperation()) {
1788  return MaybeCFI;
1789  default:
1790  return End;
1791  }
1792 }
1793 
1795 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
1797  bool IsPreIdx) {
1798  assert((Update->getOpcode() == AArch64::ADDXri ||
1799  Update->getOpcode() == AArch64::SUBXri) &&
1800  "Unexpected base register update instruction to merge!");
1801  MachineBasicBlock::iterator E = I->getParent()->end();
1803 
1804  // If updating the SP and the following instruction is CFA offset related CFI
1805  // instruction move it after the merged instruction.
1807  IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E;
1808 
1809  // Return the instruction following the merged instruction, which is
1810  // the instruction following our unmerged load. Unless that's the add/sub
1811  // instruction we're merging, in which case it's the one after that.
1812  if (NextI == Update)
1813  NextI = next_nodbg(NextI, E);
1814 
1815  int Value = Update->getOperand(2).getImm();
1816  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
1817  "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1818  if (Update->getOpcode() == AArch64::SUBXri)
1819  Value = -Value;
1820 
1821  unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
1823  MachineInstrBuilder MIB;
1824  int Scale, MinOffset, MaxOffset;
1825  getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
1826  if (!isPairedLdSt(*I)) {
1827  // Non-paired instruction.
1828  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1829  .add(getLdStRegOp(*Update))
1830  .add(getLdStRegOp(*I))
1831  .add(getLdStBaseOp(*I))
1832  .addImm(Value / Scale)
1833  .setMemRefs(I->memoperands())
1834  .setMIFlags(I->mergeFlagsWith(*Update));
1835  } else {
1836  // Paired instruction.
1837  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1838  .add(getLdStRegOp(*Update))
1839  .add(getLdStRegOp(*I, 0))
1840  .add(getLdStRegOp(*I, 1))
1841  .add(getLdStBaseOp(*I))
1842  .addImm(Value / Scale)
1843  .setMemRefs(I->memoperands())
1844  .setMIFlags(I->mergeFlagsWith(*Update));
1845  }
1846  if (CFI != E) {
1847  MachineBasicBlock *MBB = I->getParent();
1848  MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
1849  }
1850 
1851  if (IsPreIdx) {
1852  ++NumPreFolded;
1853  LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
1854  } else {
1855  ++NumPostFolded;
1856  LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
1857  }
1858  LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
1859  LLVM_DEBUG(I->print(dbgs()));
1860  LLVM_DEBUG(dbgs() << " ");
1861  LLVM_DEBUG(Update->print(dbgs()));
1862  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1863  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1864  LLVM_DEBUG(dbgs() << "\n");
1865 
1866  // Erase the old instructions for the block.
1867  I->eraseFromParent();
1868  Update->eraseFromParent();
1869 
1870  return NextI;
1871 }
1872 
1873 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
1874  MachineInstr &MI,
1875  unsigned BaseReg, int Offset) {
1876  switch (MI.getOpcode()) {
1877  default:
1878  break;
1879  case AArch64::SUBXri:
1880  case AArch64::ADDXri:
1881  // Make sure it's a vanilla immediate operand, not a relocation or
1882  // anything else we can't handle.
1883  if (!MI.getOperand(2).isImm())
1884  break;
1885  // Watch out for 1 << 12 shifted value.
1886  if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
1887  break;
1888 
1889  // The update instruction source and destination register must be the
1890  // same as the load/store base register.
1891  if (MI.getOperand(0).getReg() != BaseReg ||
1892  MI.getOperand(1).getReg() != BaseReg)
1893  break;
1894 
1895  int UpdateOffset = MI.getOperand(2).getImm();
1896  if (MI.getOpcode() == AArch64::SUBXri)
1897  UpdateOffset = -UpdateOffset;
1898 
1899  // The immediate must be a multiple of the scaling factor of the pre/post
1900  // indexed instruction.
1901  int Scale, MinOffset, MaxOffset;
1902  getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
1903  if (UpdateOffset % Scale != 0)
1904  break;
1905 
1906  // Scaled offset must fit in the instruction immediate.
1907  int ScaledOffset = UpdateOffset / Scale;
1908  if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1909  break;
1910 
1911  // If we have a non-zero Offset, we check that it matches the amount
1912  // we're adding to the register.
1913  if (!Offset || Offset == UpdateOffset)
1914  return true;
1915  break;
1916  }
1917  return false;
1918 }
1919 
1920 static bool needsWinCFI(const MachineFunction *MF) {
1921  return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1923 }
1924 
1925 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
1926  MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
1927  MachineBasicBlock::iterator E = I->getParent()->end();
1928  MachineInstr &MemMI = *I;
1930 
1931  Register BaseReg = getLdStBaseOp(MemMI).getReg();
1932  int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
1933 
1934  // Scan forward looking for post-index opportunities. Updating instructions
1935  // can't be formed if the memory instruction doesn't have the offset we're
1936  // looking for.
1937  if (MIUnscaledOffset != UnscaledOffset)
1938  return E;
1939 
1940  // If the base register overlaps a source/destination register, we can't
1941  // merge the update. This does not apply to tag store instructions which
1942  // ignore the address part of the source register.
1943  // This does not apply to STGPi as well, which does not have unpredictable
1944  // behavior in this case unlike normal stores, and always performs writeback
1945  // after reading the source register value.
1946  if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
1947  bool IsPairedInsn = isPairedLdSt(MemMI);
1948  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1949  Register DestReg = getLdStRegOp(MemMI, i).getReg();
1950  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1951  return E;
1952  }
1953  }
1954 
1955  // Track which register units have been modified and used between the first
1956  // insn (inclusive) and the second insn.
1957  ModifiedRegUnits.clear();
1958  UsedRegUnits.clear();
1959  MBBI = next_nodbg(MBBI, E);
1960 
1961  // We can't post-increment the stack pointer if any instruction between
1962  // the memory access (I) and the increment (MBBI) can access the memory
1963  // region defined by [SP, MBBI].
1964  const bool BaseRegSP = BaseReg == AArch64::SP;
1965  if (BaseRegSP && needsWinCFI(I->getMF())) {
1966  // FIXME: For now, we always block the optimization over SP in windows
1967  // targets as it requires to adjust the unwind/debug info, messing up
1968  // the unwind info can actually cause a miscompile.
1969  return E;
1970  }
1971 
1972  for (unsigned Count = 0; MBBI != E && Count < Limit;
1973  MBBI = next_nodbg(MBBI, E)) {
1974  MachineInstr &MI = *MBBI;
1975 
1976  // Don't count transient instructions towards the search limit since there
1977  // may be different numbers of them if e.g. debug information is present.
1978  if (!MI.isTransient())
1979  ++Count;
1980 
1981  // If we found a match, return it.
1982  if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1983  return MBBI;
1984 
1985  // Update the status of what the instruction clobbered and used.
1986  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1987 
1988  // Otherwise, if the base register is used or modified, we have no match, so
1989  // return early.
1990  // If we are optimizing SP, do not allow instructions that may load or store
1991  // in between the load and the optimized value update.
1992  if (!ModifiedRegUnits.available(BaseReg) ||
1993  !UsedRegUnits.available(BaseReg) ||
1994  (BaseRegSP && MBBI->mayLoadOrStore()))
1995  return E;
1996  }
1997  return E;
1998 }
1999 
2000 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2001  MachineBasicBlock::iterator I, unsigned Limit) {
2002  MachineBasicBlock::iterator B = I->getParent()->begin();
2003  MachineBasicBlock::iterator E = I->getParent()->end();
2004  MachineInstr &MemMI = *I;
2006  MachineFunction &MF = *MemMI.getMF();
2007 
2008  Register BaseReg = getLdStBaseOp(MemMI).getReg();
2009  int Offset = getLdStOffsetOp(MemMI).getImm();
2010 
2011  // If the load/store is the first instruction in the block, there's obviously
2012  // not any matching update. Ditto if the memory offset isn't zero.
2013  if (MBBI == B || Offset != 0)
2014  return E;
2015  // If the base register overlaps a destination register, we can't
2016  // merge the update.
2017  if (!isTagStore(MemMI)) {
2018  bool IsPairedInsn = isPairedLdSt(MemMI);
2019  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2020  Register DestReg = getLdStRegOp(MemMI, i).getReg();
2021  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2022  return E;
2023  }
2024  }
2025 
2026  const bool BaseRegSP = BaseReg == AArch64::SP;
2027  if (BaseRegSP && needsWinCFI(I->getMF())) {
2028  // FIXME: For now, we always block the optimization over SP in windows
2029  // targets as it requires to adjust the unwind/debug info, messing up
2030  // the unwind info can actually cause a miscompile.
2031  return E;
2032  }
2033 
2034  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2035  unsigned RedZoneSize =
2036  Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2037 
2038  // Track which register units have been modified and used between the first
2039  // insn (inclusive) and the second insn.
2040  ModifiedRegUnits.clear();
2041  UsedRegUnits.clear();
2042  unsigned Count = 0;
2043  bool MemAcessBeforeSPPreInc = false;
2044  do {
2045  MBBI = prev_nodbg(MBBI, B);
2046  MachineInstr &MI = *MBBI;
2047 
2048  // Don't count transient instructions towards the search limit since there
2049  // may be different numbers of them if e.g. debug information is present.
2050  if (!MI.isTransient())
2051  ++Count;
2052 
2053  // If we found a match, return it.
2054  if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2055  // Check that the update value is within our red zone limit (which may be
2056  // zero).
2057  if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2058  return E;
2059  return MBBI;
2060  }
2061 
2062  // Update the status of what the instruction clobbered and used.
2063  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2064 
2065  // Otherwise, if the base register is used or modified, we have no match, so
2066  // return early.
2067  if (!ModifiedRegUnits.available(BaseReg) ||
2068  !UsedRegUnits.available(BaseReg))
2069  return E;
2070  // Keep track if we have a memory access before an SP pre-increment, in this
2071  // case we need to validate later that the update amount respects the red
2072  // zone.
2073  if (BaseRegSP && MBBI->mayLoadOrStore())
2074  MemAcessBeforeSPPreInc = true;
2075  } while (MBBI != B && Count < Limit);
2076  return E;
2077 }
2078 
2079 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2081  MachineInstr &MI = *MBBI;
2082  // If this is a volatile load, don't mess with it.
2083  if (MI.hasOrderedMemoryRef())
2084  return false;
2085 
2086  // Make sure this is a reg+imm.
2087  // FIXME: It is possible to extend it to handle reg+reg cases.
2088  if (!getLdStOffsetOp(MI).isImm())
2089  return false;
2090 
2091  // Look backward up to LdStLimit instructions.
2093  if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2094  ++NumLoadsFromStoresPromoted;
2095  // Promote the load. Keeping the iterator straight is a
2096  // pain, so we let the merge routine tell us what the next instruction
2097  // is after it's done mucking about.
2098  MBBI = promoteLoadFromStore(MBBI, StoreI);
2099  return true;
2100  }
2101  return false;
2102 }
2103 
2104 // Merge adjacent zero stores into a wider store.
2105 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2107  assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2108  MachineInstr &MI = *MBBI;
2109  MachineBasicBlock::iterator E = MI.getParent()->end();
2110 
2111  if (!TII->isCandidateToMergeOrPair(MI))
2112  return false;
2113 
2114  // Look ahead up to LdStLimit instructions for a mergable instruction.
2115  LdStPairFlags Flags;
2116  MachineBasicBlock::iterator MergeMI =
2117  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2118  if (MergeMI != E) {
2119  ++NumZeroStoresPromoted;
2120 
2121  // Keeping the iterator straight is a pain, so we let the merge routine tell
2122  // us what the next instruction is after it's done mucking about.
2123  MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2124  return true;
2125  }
2126  return false;
2127 }
2128 
2129 // Find loads and stores that can be merged into a single load or store pair
2130 // instruction.
2131 bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2132  MachineInstr &MI = *MBBI;
2133  MachineBasicBlock::iterator E = MI.getParent()->end();
2134 
2135  if (!TII->isCandidateToMergeOrPair(MI))
2136  return false;
2137 
2138  // Early exit if the offset is not possible to match. (6 bits of positive
2139  // range, plus allow an extra one in case we find a later insn that matches
2140  // with Offset-1)
2141  bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2142  int Offset = getLdStOffsetOp(MI).getImm();
2143  int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2144  // Allow one more for offset.
2145  if (Offset > 0)
2146  Offset -= OffsetStride;
2147  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2148  return false;
2149 
2150  // Look ahead up to LdStLimit instructions for a pairable instruction.
2151  LdStPairFlags Flags;
2153  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2154  if (Paired != E) {
2155  ++NumPairCreated;
2156  if (TII->hasUnscaledLdStOffset(MI))
2157  ++NumUnscaledPairCreated;
2158  // Keeping the iterator straight is a pain, so we let the merge routine tell
2159  // us what the next instruction is after it's done mucking about.
2160  auto Prev = std::prev(MBBI);
2161  MBBI = mergePairedInsns(MBBI, Paired, Flags);
2162  // Collect liveness info for instructions between Prev and the new position
2163  // MBBI.
2164  for (auto I = std::next(Prev); I != MBBI; I++)
2165  updateDefinedRegisters(*I, DefinedInBB, TRI);
2166 
2167  return true;
2168  }
2169  return false;
2170 }
2171 
2172 bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2174  MachineInstr &MI = *MBBI;
2175  MachineBasicBlock::iterator E = MI.getParent()->end();
2177 
2178  // Look forward to try to form a post-index instruction. For example,
2179  // ldr x0, [x20]
2180  // add x20, x20, #32
2181  // merged into:
2182  // ldr x0, [x20], #32
2183  Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2184  if (Update != E) {
2185  // Merge the update into the ld/st.
2186  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
2187  return true;
2188  }
2189 
2190  // Don't know how to handle unscaled pre/post-index versions below, so bail.
2191  if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2192  return false;
2193 
2194  // Look back to try to find a pre-index instruction. For example,
2195  // add x0, x0, #8
2196  // ldr x1, [x0]
2197  // merged into:
2198  // ldr x1, [x0, #8]!
2199  Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
2200  if (Update != E) {
2201  // Merge the update into the ld/st.
2202  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2203  return true;
2204  }
2205 
2206  // The immediate in the load/store is scaled by the size of the memory
2207  // operation. The immediate in the add we're looking for,
2208  // however, is not, so adjust here.
2209  int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
2210 
2211  // Look forward to try to find a pre-index instruction. For example,
2212  // ldr x1, [x0, #64]
2213  // add x0, x0, #64
2214  // merged into:
2215  // ldr x1, [x0, #64]!
2216  Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2217  if (Update != E) {
2218  // Merge the update into the ld/st.
2219  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2220  return true;
2221  }
2222 
2223  return false;
2224 }
2225 
2227  bool EnableNarrowZeroStOpt) {
2228 
2229  bool Modified = false;
2230  // Four tranformations to do here:
2231  // 1) Find loads that directly read from stores and promote them by
2232  // replacing with mov instructions. If the store is wider than the load,
2233  // the load will be replaced with a bitfield extract.
2234  // e.g.,
2235  // str w1, [x0, #4]
2236  // ldrh w2, [x0, #6]
2237  // ; becomes
2238  // str w1, [x0, #4]
2239  // lsr w2, w1, #16
2241  MBBI != E;) {
2242  if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
2243  Modified = true;
2244  else
2245  ++MBBI;
2246  }
2247  // 2) Merge adjacent zero stores into a wider store.
2248  // e.g.,
2249  // strh wzr, [x0]
2250  // strh wzr, [x0, #2]
2251  // ; becomes
2252  // str wzr, [x0]
2253  // e.g.,
2254  // str wzr, [x0]
2255  // str wzr, [x0, #4]
2256  // ; becomes
2257  // str xzr, [x0]
2258  if (EnableNarrowZeroStOpt)
2260  MBBI != E;) {
2261  if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
2262  Modified = true;
2263  else
2264  ++MBBI;
2265  }
2266  // 3) Find loads and stores that can be merged into a single load or store
2267  // pair instruction.
2268  // e.g.,
2269  // ldr x0, [x2]
2270  // ldr x1, [x2, #8]
2271  // ; becomes
2272  // ldp x0, x1, [x2]
2273 
2274  if (MBB.getParent()->getRegInfo().tracksLiveness()) {
2275  DefinedInBB.clear();
2276  DefinedInBB.addLiveIns(MBB);
2277  }
2278 
2280  MBBI != E;) {
2281  // Track currently live registers up to this point, to help with
2282  // searching for a rename register on demand.
2283  updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
2284  if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
2285  Modified = true;
2286  else
2287  ++MBBI;
2288  }
2289  // 4) Find base register updates that can be merged into the load or store
2290  // as a base-reg writeback.
2291  // e.g.,
2292  // ldr x0, [x2]
2293  // add x2, x2, #4
2294  // ; becomes
2295  // ldr x0, [x2], #4
2297  MBBI != E;) {
2298  if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
2299  Modified = true;
2300  else
2301  ++MBBI;
2302  }
2303 
2304  return Modified;
2305 }
2306 
2307 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2308  if (skipFunction(Fn.getFunction()))
2309  return false;
2310 
2311  Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
2312  TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
2313  TRI = Subtarget->getRegisterInfo();
2314  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2315 
2316  // Resize the modified and used register unit trackers. We do this once
2317  // per function and then clear the register units each time we optimize a load
2318  // or store.
2319  ModifiedRegUnits.init(*TRI);
2320  UsedRegUnits.init(*TRI);
2321  DefinedInBB.init(*TRI);
2322 
2323  bool Modified = false;
2324  bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2325  for (auto &MBB : Fn) {
2326  auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
2327  Modified |= M;
2328  }
2329 
2330  return Modified;
2331 }
2332 
2333 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
2334 // stores near one another? Note: The pre-RA instruction scheduler already has
2335 // hooks to try and schedule pairable loads/stores together to improve pairing
2336 // opportunities. Thus, pre-RA pairing pass may not be worth the effort.
2337 
2338 // FIXME: When pairing store instructions it's very possible for this pass to
2339 // hoist a store with a KILL marker above another use (without a KILL marker).
2340 // The resulting IR is invalid, but nothing uses the KILL markers after this
2341 // pass, so it's never caused a problem in practice.
2342 
2343 /// createAArch64LoadStoreOptimizationPass - returns an instance of the
2344 /// load / store optimization pass.
2346  return new AArch64LoadStoreOpt();
2347 }
i
i
Definition: README.txt:29
llvm::next_nodbg
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1252
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
MCDwarf.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
isMergeableLdStUpdate
static bool isMergeableLdStUpdate(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:690
AArch64MachineFunctionInfo.h
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:35
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::LiveRegUnits::accumulateUsedDefed
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
Definition: LiveRegUnits.h:47
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
StringRef.h
Pass.h
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:964
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1013
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:509
llvm::SmallVector< MachineInstr *, 4 >
updateDefinedRegisters
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:841
Statistic.h
AARCH64_LOAD_STORE_OPT_NAME
#define AARCH64_LOAD_STORE_OPT_NAME
Definition: AArch64LoadStoreOptimizer.cpp:82
ErrorHandling.h
llvm::MachineOperand::isTied
bool isTied() const
Definition: MachineOperand.h:440
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64LoadStoreOptimizer.cpp:56
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::AArch64InstrInfo::isPreLdSt
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
Definition: AArch64InstrInfo.cpp:3151
MachineBasicBlock.h
llvm::LiveRegUnits::available
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::LiveRegUnits::addReg
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
Definition: LiveRegUnits.h:86
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:171
llvm::MCRegisterInfo::sub_and_superregs_inclusive
detail::concat_range< const MCPhysReg, iterator_range< mc_subreg_iterator >, iterator_range< mc_superreg_iterator > > sub_and_superregs_inclusive(MCRegister Reg) const
Return an iterator range over all sub- and super-registers of Reg, including Reg.
Definition: MCRegisterInfo.h:338
llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:636
isPromotableZeroStoreInst
static bool isPromotableZeroStoreInst(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:665
getMatchingWideOpcode
static unsigned getMatchingWideOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:298
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::Optional
Definition: APInt.h:33
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1618
llvm::createAArch64LoadStoreOptimizationPass
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
Definition: AArch64LoadStoreOptimizer.cpp:2345
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition: AArch64Subtarget.h:168
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
isImm
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
Definition: SPIRVInstructionSelector.cpp:982
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1329
MachineRegisterInfo.h
llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition: MachineInstr.h:86
llvm::MachineFunction::getFrameInstructions
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
Definition: MachineFunction.h:1023
AliasAnalysis.h
llvm::MachineRegisterInfo::tracksLiveness
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Definition: MachineRegisterInfo.h:195
INITIALIZE_PASS
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:218
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MCCFIInstruction::OpDefCfa
@ OpDefCfa
Definition: MCDwarf.h:482
CommandLine.h
llvm::MachineOperand::isRenamable
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
Definition: MachineOperand.cpp:116
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:94
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1605
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:650
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:379
AArch64InstrInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::LiveRegUnits::accumulate
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
Definition: LiveRegUnits.cpp:60
llvm::AAResults
Definition: AliasAnalysis.h:511
getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:517
llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:37
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:501
llvm::MachineRegisterInfo::isReserved
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Definition: MachineRegisterInfo.h:925
getPostIndexedOpcode
static unsigned getPostIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:480
llvm::MachineInstr::mayAlias
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
Definition: MachineInstr.cpp:1281
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:190
llvm::phys_regs_and_masks
iterator_range< filter_iterator< ConstMIBundleOperands, std::function< bool(const MachineOperand &)> > > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
Definition: LiveRegUnits.h:166
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:180
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:84
mayAlias
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
Definition: AArch64LoadStoreOptimizer.cpp:1230
llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition: AArch64AddressingModes.h:99
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
LdStLimit
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopDeletionResult::Modified
@ Modified
forAllMIsUntilDef
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
Definition: AArch64LoadStoreOptimizer.cpp:819
BitVector.h
llvm::AArch64_AM::getShiftValue
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
Definition: AArch64AddressingModes.h:86
llvm::MCCFIInstruction::getOperation
OpType getOperation() const
Definition: MCDwarf.h:635
DebugLoc.h
areCandidatesToMergeOrPair
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:1302
getPrePostIndexedMemOpInfo
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
Definition: AArch64LoadStoreOptimizer.cpp:611
llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
DEBUG_COUNTER
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming")
isPairedLdSt
static bool isPairedLdSt(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:559
llvm::LiveRegUnits::removeReg
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
Definition: LiveRegUnits.h:102
llvm::None
const NoneType None
Definition: None.h:24
EnableRenaming
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MCCFIInstruction
Definition: MCDwarf.h:472
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
isPreLdStPairCandidate
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:579
needsWinCFI
static bool needsWinCFI(const MachineFunction *MF)
Definition: AArch64LoadStoreOptimizer.cpp:1920
llvm::MCID::MayLoad
@ MayLoad
Definition: MCInstrDesc.h:167
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
AArch64AddressingModes.h
UpdateLimit
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:419
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:750
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:409
llvm::MCCFIInstruction::OpDefCfaOffset
@ OpDefCfaOffset
Definition: MCDwarf.h:481
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:267
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
isTagStore
static bool isTagStore(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:235
llvm::TargetRegisterInfo::getRegClassName
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
Definition: TargetRegisterInfo.h:756
getLdStOffsetOp
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:644
getPreIndexedOpcode
static unsigned getPreIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:407
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::TargetRegisterInfo::getMatchingSuperReg
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Definition: TargetRegisterInfo.h:588
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
MCRegisterInfo.h
llvm::prev_nodbg
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1259
MachineFunctionPass.h
llvm::LiveRegUnits::init
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition: LiveRegUnits.h:73
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineOperand::isEarlyClobber
bool isEarlyClobber() const
Definition: MachineOperand.h:435
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:234
canRenameUpToDef
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1356
maybeMoveCFI
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
Definition: AArch64LoadStoreOptimizer.cpp:1773
iterator_range.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:205
isLdOffsetInRangeOfSt
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:649
MCAsmInfo.h
inBoundsForPair
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
Definition: AArch64LoadStoreOptimizer.cpp:1209
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:972
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
uint32_t
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
llvm::MachineInstrBuilder::cloneMergedMemRefs
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
Definition: MachineInstrBuilder.h:219
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition: MachineInstrBuilder.h:89
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition: AArch64Subtarget.h:172
getLdStBaseOp
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:639
isPromotableLoadFromStore
static bool isPromotableLoadFromStore(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:672
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:606
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:636
llvm::initializeAArch64LoadStoreOptPass
void initializeAArch64LoadStoreOptPass(PassRegistry &)
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1026
optimizeBlock
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
Definition: ScalarizeMaskedMemIntrin.cpp:908
getMatchingNonSExtOpcode
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
Definition: AArch64LoadStoreOptimizer.cpp:247
DebugCounter.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::Function::needsUnwindTableEntry
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:638
llvm::MachineInstrBuilder::setMemRefs
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
Definition: MachineInstrBuilder.h:208
llvm::MCRegisterInfo::isSubRegister
bool isSubRegister(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA.
Definition: MCRegisterInfo.h:560
isMatchingStore
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
Definition: AArch64LoadStoreOptimizer.cpp:377
llvm::MachineOperand::setImplicit
void setImplicit(bool Val=true)
Definition: MachineOperand.h:504
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:432
llvm::MachineOperand::isDebug
bool isDebug() const
Definition: MachineOperand.h:445
AA
llvm::AArch64TargetLowering::getRedZoneSize
unsigned getRedZoneSize(const Function &F) const
Definition: AArch64ISelLowering.h:844
AArch64Subtarget.h
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:277
MachineInstrBuilder.h
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:273
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:53
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1347
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1113
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::AArch64InstrInfo::getMemScale
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
Definition: AArch64InstrInfo.cpp:3053
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::MCRegisterInfo::isSuperOrSubRegisterEq
bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a super-register or sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:580
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::TargetRegisterInfo::getMinimalPhysRegClass
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Definition: TargetRegisterInfo.cpp:212
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
raw_ostream.h
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
getMatchingPairOpcode
static unsigned getMatchingPairOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:317
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:620
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:279
getLdStRegOp
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
Definition: AArch64LoadStoreOptimizer.cpp:629
llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition: MachineBasicBlock.h:1266
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
tryToFindRegisterToRename
static Optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1485