LLVM  16.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1 //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that performs load / store related peephole
10 // optimizations. This pass should be run after register allocation.
11 //
12 // The pass runs after the PrologEpilogInserter where we emit the CFI
13 // instructions. In order to preserve the correctness of the unwind informaiton,
14 // the pass should not change the order of any two instructions, one of which
15 // has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16 // to unwind information.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "AArch64InstrInfo.h"
22 #include "AArch64Subtarget.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/ADT/StringRef.h"
38 #include "llvm/IR/DebugLoc.h"
39 #include "llvm/MC/MCAsmInfo.h"
40 #include "llvm/MC/MCDwarf.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/Pass.h"
44 #include "llvm/Support/Debug.h"
48 #include <cassert>
49 #include <cstdint>
50 #include <functional>
51 #include <iterator>
52 #include <limits>
53 #include <optional>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "aarch64-ldst-opt"
58 
59 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
60 STATISTIC(NumPostFolded, "Number of post-index updates folded");
61 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
62 STATISTIC(NumUnscaledPairCreated,
63  "Number of load/store from unscaled generated");
64 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
65 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
66 
67 DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
68  "Controls which pairs are considered for renaming");
69 
70 // The LdStLimit limits how far we search for load/store pairs.
71 static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
72  cl::init(20), cl::Hidden);
73 
74 // The UpdateLimit limits how far we search for update instructions when we form
75 // pre-/post-index instructions.
76 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
77  cl::Hidden);
78 
79 // Enable register renaming to find additional store pairing opportunities.
80 static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
81  cl::init(true), cl::Hidden);
82 
83 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
84 
85 namespace {
86 
87 using LdStPairFlags = struct LdStPairFlags {
88  // If a matching instruction is found, MergeForward is set to true if the
89  // merge is to remove the first instruction and replace the second with
90  // a pair-wise insn, and false if the reverse is true.
91  bool MergeForward = false;
92 
93  // SExtIdx gives the index of the result of the load pair that must be
94  // extended. The value of SExtIdx assumes that the paired load produces the
95  // value in this order: (I, returned iterator), i.e., -1 means no value has
96  // to be extended, 0 means I, and 1 means the returned iterator.
97  int SExtIdx = -1;
98 
99  // If not none, RenameReg can be used to rename the result register of the
100  // first store in a pair. Currently this only works when merging stores
101  // forward.
102  std::optional<MCPhysReg> RenameReg = std::nullopt;
103 
104  LdStPairFlags() = default;
105 
106  void setMergeForward(bool V = true) { MergeForward = V; }
107  bool getMergeForward() const { return MergeForward; }
108 
109  void setSExtIdx(int V) { SExtIdx = V; }
110  int getSExtIdx() const { return SExtIdx; }
111 
112  void setRenameReg(MCPhysReg R) { RenameReg = R; }
113  void clearRenameReg() { RenameReg = std::nullopt; }
114  std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
115 };
116 
117 struct AArch64LoadStoreOpt : public MachineFunctionPass {
118  static char ID;
119 
120  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
122  }
123 
124  AliasAnalysis *AA;
125  const AArch64InstrInfo *TII;
126  const TargetRegisterInfo *TRI;
127  const AArch64Subtarget *Subtarget;
128 
129  // Track which register units have been modified and used.
130  LiveRegUnits ModifiedRegUnits, UsedRegUnits;
131  LiveRegUnits DefinedInBB;
132 
133  void getAnalysisUsage(AnalysisUsage &AU) const override {
136  }
137 
138  // Scan the instructions looking for a load/store that can be combined
139  // with the current instruction into a load/store pair.
140  // Return the matching instruction if one is found, else MBB->end().
142  LdStPairFlags &Flags,
143  unsigned Limit,
144  bool FindNarrowMerge);
145 
146  // Scan the instructions looking for a store that writes to the address from
147  // which the current load instruction reads. Return true if one is found.
148  bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
150 
151  // Merge the two instructions indicated into a wider narrow store instruction.
153  mergeNarrowZeroStores(MachineBasicBlock::iterator I,
155  const LdStPairFlags &Flags);
156 
157  // Merge the two instructions indicated into a single pair-wise instruction.
159  mergePairedInsns(MachineBasicBlock::iterator I,
161  const LdStPairFlags &Flags);
162 
163  // Promote the load that reads directly from the address stored to.
165  promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
167 
168  // Scan the instruction list to find a base register update that can
169  // be combined with the current instruction (a load or store) using
170  // pre or post indexed addressing with writeback. Scan forwards.
172  findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
173  int UnscaledOffset, unsigned Limit);
174 
175  // Scan the instruction list to find a base register update that can
176  // be combined with the current instruction (a load or store) using
177  // pre or post indexed addressing with writeback. Scan backwards.
179  findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
180 
181  // Find an instruction that updates the base register of the ld/st
182  // instruction.
183  bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
184  unsigned BaseReg, int Offset);
185 
186  // Merge a pre- or post-index base register update into a ld/st instruction.
188  mergeUpdateInsn(MachineBasicBlock::iterator I,
189  MachineBasicBlock::iterator Update, bool IsPreIdx);
190 
191  // Find and merge zero store instructions.
192  bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
193 
194  // Find and pair ldr/str instructions.
195  bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
196 
197  // Find and promote load instructions which read directly from store.
198  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
199 
200  // Find and merge a base register updates before or after a ld/st instruction.
201  bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
202 
203  bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
204 
205  bool runOnMachineFunction(MachineFunction &Fn) override;
206 
207  MachineFunctionProperties getRequiredProperties() const override {
210  }
211 
212  StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
213 };
214 
215 char AArch64LoadStoreOpt::ID = 0;
216 
217 } // end anonymous namespace
218 
219 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
220  AARCH64_LOAD_STORE_OPT_NAME, false, false)
221 
222 static bool isNarrowStore(unsigned Opc) {
223  switch (Opc) {
224  default:
225  return false;
226  case AArch64::STRBBui:
227  case AArch64::STURBBi:
228  case AArch64::STRHHui:
229  case AArch64::STURHHi:
230  return true;
231  }
232 }
233 
234 // These instruction set memory tag and either keep memory contents unchanged or
235 // set it to zero, ignoring the address part of the source register.
236 static bool isTagStore(const MachineInstr &MI) {
237  switch (MI.getOpcode()) {
238  default:
239  return false;
240  case AArch64::STGOffset:
241  case AArch64::STZGOffset:
242  case AArch64::ST2GOffset:
243  case AArch64::STZ2GOffset:
244  return true;
245  }
246 }
247 
248 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
249  bool *IsValidLdStrOpc = nullptr) {
250  if (IsValidLdStrOpc)
251  *IsValidLdStrOpc = true;
252  switch (Opc) {
253  default:
254  if (IsValidLdStrOpc)
255  *IsValidLdStrOpc = false;
257  case AArch64::STRDui:
258  case AArch64::STURDi:
259  case AArch64::STRDpre:
260  case AArch64::STRQui:
261  case AArch64::STURQi:
262  case AArch64::STRQpre:
263  case AArch64::STRBBui:
264  case AArch64::STURBBi:
265  case AArch64::STRHHui:
266  case AArch64::STURHHi:
267  case AArch64::STRWui:
268  case AArch64::STRWpre:
269  case AArch64::STURWi:
270  case AArch64::STRXui:
271  case AArch64::STRXpre:
272  case AArch64::STURXi:
273  case AArch64::LDRDui:
274  case AArch64::LDURDi:
275  case AArch64::LDRDpre:
276  case AArch64::LDRQui:
277  case AArch64::LDURQi:
278  case AArch64::LDRQpre:
279  case AArch64::LDRWui:
280  case AArch64::LDURWi:
281  case AArch64::LDRWpre:
282  case AArch64::LDRXui:
283  case AArch64::LDURXi:
284  case AArch64::LDRXpre:
285  case AArch64::STRSui:
286  case AArch64::STURSi:
287  case AArch64::STRSpre:
288  case AArch64::LDRSui:
289  case AArch64::LDURSi:
290  case AArch64::LDRSpre:
291  return Opc;
292  case AArch64::LDRSWui:
293  return AArch64::LDRWui;
294  case AArch64::LDURSWi:
295  return AArch64::LDURWi;
296  }
297 }
298 
299 static unsigned getMatchingWideOpcode(unsigned Opc) {
300  switch (Opc) {
301  default:
302  llvm_unreachable("Opcode has no wide equivalent!");
303  case AArch64::STRBBui:
304  return AArch64::STRHHui;
305  case AArch64::STRHHui:
306  return AArch64::STRWui;
307  case AArch64::STURBBi:
308  return AArch64::STURHHi;
309  case AArch64::STURHHi:
310  return AArch64::STURWi;
311  case AArch64::STURWi:
312  return AArch64::STURXi;
313  case AArch64::STRWui:
314  return AArch64::STRXui;
315  }
316 }
317 
318 static unsigned getMatchingPairOpcode(unsigned Opc) {
319  switch (Opc) {
320  default:
321  llvm_unreachable("Opcode has no pairwise equivalent!");
322  case AArch64::STRSui:
323  case AArch64::STURSi:
324  return AArch64::STPSi;
325  case AArch64::STRSpre:
326  return AArch64::STPSpre;
327  case AArch64::STRDui:
328  case AArch64::STURDi:
329  return AArch64::STPDi;
330  case AArch64::STRDpre:
331  return AArch64::STPDpre;
332  case AArch64::STRQui:
333  case AArch64::STURQi:
334  return AArch64::STPQi;
335  case AArch64::STRQpre:
336  return AArch64::STPQpre;
337  case AArch64::STRWui:
338  case AArch64::STURWi:
339  return AArch64::STPWi;
340  case AArch64::STRWpre:
341  return AArch64::STPWpre;
342  case AArch64::STRXui:
343  case AArch64::STURXi:
344  return AArch64::STPXi;
345  case AArch64::STRXpre:
346  return AArch64::STPXpre;
347  case AArch64::LDRSui:
348  case AArch64::LDURSi:
349  return AArch64::LDPSi;
350  case AArch64::LDRSpre:
351  return AArch64::LDPSpre;
352  case AArch64::LDRDui:
353  case AArch64::LDURDi:
354  return AArch64::LDPDi;
355  case AArch64::LDRDpre:
356  return AArch64::LDPDpre;
357  case AArch64::LDRQui:
358  case AArch64::LDURQi:
359  return AArch64::LDPQi;
360  case AArch64::LDRQpre:
361  return AArch64::LDPQpre;
362  case AArch64::LDRWui:
363  case AArch64::LDURWi:
364  return AArch64::LDPWi;
365  case AArch64::LDRWpre:
366  return AArch64::LDPWpre;
367  case AArch64::LDRXui:
368  case AArch64::LDURXi:
369  return AArch64::LDPXi;
370  case AArch64::LDRXpre:
371  return AArch64::LDPXpre;
372  case AArch64::LDRSWui:
373  case AArch64::LDURSWi:
374  return AArch64::LDPSWi;
375  }
376 }
377 
380  unsigned LdOpc = LoadInst.getOpcode();
381  unsigned StOpc = StoreInst.getOpcode();
382  switch (LdOpc) {
383  default:
384  llvm_unreachable("Unsupported load instruction!");
385  case AArch64::LDRBBui:
386  return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
387  StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
388  case AArch64::LDURBBi:
389  return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
390  StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
391  case AArch64::LDRHHui:
392  return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
393  StOpc == AArch64::STRXui;
394  case AArch64::LDURHHi:
395  return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
396  StOpc == AArch64::STURXi;
397  case AArch64::LDRWui:
398  return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
399  case AArch64::LDURWi:
400  return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
401  case AArch64::LDRXui:
402  return StOpc == AArch64::STRXui;
403  case AArch64::LDURXi:
404  return StOpc == AArch64::STURXi;
405  }
406 }
407 
408 static unsigned getPreIndexedOpcode(unsigned Opc) {
409  // FIXME: We don't currently support creating pre-indexed loads/stores when
410  // the load or store is the unscaled version. If we decide to perform such an
411  // optimization in the future the cases for the unscaled loads/stores will
412  // need to be added here.
413  switch (Opc) {
414  default:
415  llvm_unreachable("Opcode has no pre-indexed equivalent!");
416  case AArch64::STRSui:
417  return AArch64::STRSpre;
418  case AArch64::STRDui:
419  return AArch64::STRDpre;
420  case AArch64::STRQui:
421  return AArch64::STRQpre;
422  case AArch64::STRBBui:
423  return AArch64::STRBBpre;
424  case AArch64::STRHHui:
425  return AArch64::STRHHpre;
426  case AArch64::STRWui:
427  return AArch64::STRWpre;
428  case AArch64::STRXui:
429  return AArch64::STRXpre;
430  case AArch64::LDRSui:
431  return AArch64::LDRSpre;
432  case AArch64::LDRDui:
433  return AArch64::LDRDpre;
434  case AArch64::LDRQui:
435  return AArch64::LDRQpre;
436  case AArch64::LDRBBui:
437  return AArch64::LDRBBpre;
438  case AArch64::LDRHHui:
439  return AArch64::LDRHHpre;
440  case AArch64::LDRWui:
441  return AArch64::LDRWpre;
442  case AArch64::LDRXui:
443  return AArch64::LDRXpre;
444  case AArch64::LDRSWui:
445  return AArch64::LDRSWpre;
446  case AArch64::LDPSi:
447  return AArch64::LDPSpre;
448  case AArch64::LDPSWi:
449  return AArch64::LDPSWpre;
450  case AArch64::LDPDi:
451  return AArch64::LDPDpre;
452  case AArch64::LDPQi:
453  return AArch64::LDPQpre;
454  case AArch64::LDPWi:
455  return AArch64::LDPWpre;
456  case AArch64::LDPXi:
457  return AArch64::LDPXpre;
458  case AArch64::STPSi:
459  return AArch64::STPSpre;
460  case AArch64::STPDi:
461  return AArch64::STPDpre;
462  case AArch64::STPQi:
463  return AArch64::STPQpre;
464  case AArch64::STPWi:
465  return AArch64::STPWpre;
466  case AArch64::STPXi:
467  return AArch64::STPXpre;
468  case AArch64::STGOffset:
469  return AArch64::STGPreIndex;
470  case AArch64::STZGOffset:
471  return AArch64::STZGPreIndex;
472  case AArch64::ST2GOffset:
473  return AArch64::ST2GPreIndex;
474  case AArch64::STZ2GOffset:
475  return AArch64::STZ2GPreIndex;
476  case AArch64::STGPi:
477  return AArch64::STGPpre;
478  }
479 }
480 
481 static unsigned getPostIndexedOpcode(unsigned Opc) {
482  switch (Opc) {
483  default:
484  llvm_unreachable("Opcode has no post-indexed wise equivalent!");
485  case AArch64::STRSui:
486  case AArch64::STURSi:
487  return AArch64::STRSpost;
488  case AArch64::STRDui:
489  case AArch64::STURDi:
490  return AArch64::STRDpost;
491  case AArch64::STRQui:
492  case AArch64::STURQi:
493  return AArch64::STRQpost;
494  case AArch64::STRBBui:
495  return AArch64::STRBBpost;
496  case AArch64::STRHHui:
497  return AArch64::STRHHpost;
498  case AArch64::STRWui:
499  case AArch64::STURWi:
500  return AArch64::STRWpost;
501  case AArch64::STRXui:
502  case AArch64::STURXi:
503  return AArch64::STRXpost;
504  case AArch64::LDRSui:
505  case AArch64::LDURSi:
506  return AArch64::LDRSpost;
507  case AArch64::LDRDui:
508  case AArch64::LDURDi:
509  return AArch64::LDRDpost;
510  case AArch64::LDRQui:
511  case AArch64::LDURQi:
512  return AArch64::LDRQpost;
513  case AArch64::LDRBBui:
514  return AArch64::LDRBBpost;
515  case AArch64::LDRHHui:
516  return AArch64::LDRHHpost;
517  case AArch64::LDRWui:
518  case AArch64::LDURWi:
519  return AArch64::LDRWpost;
520  case AArch64::LDRXui:
521  case AArch64::LDURXi:
522  return AArch64::LDRXpost;
523  case AArch64::LDRSWui:
524  return AArch64::LDRSWpost;
525  case AArch64::LDPSi:
526  return AArch64::LDPSpost;
527  case AArch64::LDPSWi:
528  return AArch64::LDPSWpost;
529  case AArch64::LDPDi:
530  return AArch64::LDPDpost;
531  case AArch64::LDPQi:
532  return AArch64::LDPQpost;
533  case AArch64::LDPWi:
534  return AArch64::LDPWpost;
535  case AArch64::LDPXi:
536  return AArch64::LDPXpost;
537  case AArch64::STPSi:
538  return AArch64::STPSpost;
539  case AArch64::STPDi:
540  return AArch64::STPDpost;
541  case AArch64::STPQi:
542  return AArch64::STPQpost;
543  case AArch64::STPWi:
544  return AArch64::STPWpost;
545  case AArch64::STPXi:
546  return AArch64::STPXpost;
547  case AArch64::STGOffset:
548  return AArch64::STGPostIndex;
549  case AArch64::STZGOffset:
550  return AArch64::STZGPostIndex;
551  case AArch64::ST2GOffset:
552  return AArch64::ST2GPostIndex;
553  case AArch64::STZ2GOffset:
554  return AArch64::STZ2GPostIndex;
555  case AArch64::STGPi:
556  return AArch64::STGPpost;
557  }
558 }
559 
561 
562  unsigned OpcA = FirstMI.getOpcode();
563  unsigned OpcB = MI.getOpcode();
564 
565  switch (OpcA) {
566  default:
567  return false;
568  case AArch64::STRSpre:
569  return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
570  case AArch64::STRDpre:
571  return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
572  case AArch64::STRQpre:
573  return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
574  case AArch64::STRWpre:
575  return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
576  case AArch64::STRXpre:
577  return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
578  case AArch64::LDRSpre:
579  return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
580  case AArch64::LDRDpre:
581  return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
582  case AArch64::LDRQpre:
583  return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
584  case AArch64::LDRWpre:
585  return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
586  case AArch64::LDRXpre:
587  return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
588  }
589 }
590 
591 // Returns the scale and offset range of pre/post indexed variants of MI.
592 static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
593  int &MinOffset, int &MaxOffset) {
594  bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
595  bool IsTagStore = isTagStore(MI);
596  // ST*G and all paired ldst have the same scale in pre/post-indexed variants
597  // as in the "unsigned offset" variant.
598  // All other pre/post indexed ldst instructions are unscaled.
599  Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
600 
601  if (IsPaired) {
602  MinOffset = -64;
603  MaxOffset = 63;
604  } else {
605  MinOffset = -256;
606  MaxOffset = 255;
607  }
608 }
609 
611  unsigned PairedRegOp = 0) {
612  assert(PairedRegOp < 2 && "Unexpected register operand idx.");
613  bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
614  if (IsPreLdSt)
615  PairedRegOp += 1;
616  unsigned Idx =
617  AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
618  return MI.getOperand(Idx);
619 }
620 
623  const AArch64InstrInfo *TII) {
624  assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
625  int LoadSize = TII->getMemScale(LoadInst);
626  int StoreSize = TII->getMemScale(StoreInst);
627  int UnscaledStOffset =
628  TII->hasUnscaledLdStOffset(StoreInst)
631  int UnscaledLdOffset =
632  TII->hasUnscaledLdStOffset(LoadInst)
635  return (UnscaledStOffset <= UnscaledLdOffset) &&
636  (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
637 }
638 
640  unsigned Opc = MI.getOpcode();
641  return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
642  isNarrowStore(Opc)) &&
643  getLdStRegOp(MI).getReg() == AArch64::WZR;
644 }
645 
647  switch (MI.getOpcode()) {
648  default:
649  return false;
650  // Scaled instructions.
651  case AArch64::LDRBBui:
652  case AArch64::LDRHHui:
653  case AArch64::LDRWui:
654  case AArch64::LDRXui:
655  // Unscaled instructions.
656  case AArch64::LDURBBi:
657  case AArch64::LDURHHi:
658  case AArch64::LDURWi:
659  case AArch64::LDURXi:
660  return true;
661  }
662 }
663 
665  unsigned Opc = MI.getOpcode();
666  switch (Opc) {
667  default:
668  return false;
669  // Scaled instructions.
670  case AArch64::STRSui:
671  case AArch64::STRDui:
672  case AArch64::STRQui:
673  case AArch64::STRXui:
674  case AArch64::STRWui:
675  case AArch64::STRHHui:
676  case AArch64::STRBBui:
677  case AArch64::LDRSui:
678  case AArch64::LDRDui:
679  case AArch64::LDRQui:
680  case AArch64::LDRXui:
681  case AArch64::LDRWui:
682  case AArch64::LDRHHui:
683  case AArch64::LDRBBui:
684  case AArch64::STGOffset:
685  case AArch64::STZGOffset:
686  case AArch64::ST2GOffset:
687  case AArch64::STZ2GOffset:
688  case AArch64::STGPi:
689  // Unscaled instructions.
690  case AArch64::STURSi:
691  case AArch64::STURDi:
692  case AArch64::STURQi:
693  case AArch64::STURWi:
694  case AArch64::STURXi:
695  case AArch64::LDURSi:
696  case AArch64::LDURDi:
697  case AArch64::LDURQi:
698  case AArch64::LDURWi:
699  case AArch64::LDURXi:
700  // Paired instructions.
701  case AArch64::LDPSi:
702  case AArch64::LDPSWi:
703  case AArch64::LDPDi:
704  case AArch64::LDPQi:
705  case AArch64::LDPWi:
706  case AArch64::LDPXi:
707  case AArch64::STPSi:
708  case AArch64::STPDi:
709  case AArch64::STPQi:
710  case AArch64::STPWi:
711  case AArch64::STPXi:
712  // Make sure this is a reg+imm (as opposed to an address reloc).
714  return false;
715 
716  return true;
717  }
718 }
719 
721 AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
723  const LdStPairFlags &Flags) {
725  "Expected promotable zero stores.");
726 
727  MachineBasicBlock::iterator E = I->getParent()->end();
729  // If NextI is the second of the two instructions to be merged, we need
730  // to skip one further. Either way we merge will invalidate the iterator,
731  // and we don't need to scan the new instruction, as it's a pairwise
732  // instruction, which we're not considering for further action anyway.
733  if (NextI == MergeMI)
734  NextI = next_nodbg(NextI, E);
735 
736  unsigned Opc = I->getOpcode();
737  bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
738  int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
739 
740  bool MergeForward = Flags.getMergeForward();
741  // Insert our new paired instruction after whichever of the paired
742  // instructions MergeForward indicates.
743  MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
744  // Also based on MergeForward is from where we copy the base register operand
745  // so we get the flags compatible with the input code.
746  const MachineOperand &BaseRegOp =
747  MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
748  : AArch64InstrInfo::getLdStBaseOp(*I);
749 
750  // Which register is Rt and which is Rt2 depends on the offset order.
751  MachineInstr *RtMI;
753  AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
754  RtMI = &*MergeMI;
755  else
756  RtMI = &*I;
757 
758  int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
759  // Change the scaled offset from small to large type.
760  if (IsScaled) {
761  assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
762  OffsetImm /= 2;
763  }
764 
765  // Construct the new instruction.
766  DebugLoc DL = I->getDebugLoc();
767  MachineBasicBlock *MBB = I->getParent();
769  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
770  .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
771  .add(BaseRegOp)
772  .addImm(OffsetImm)
773  .cloneMergedMemRefs({&*I, &*MergeMI})
774  .setMIFlags(I->mergeFlagsWith(*MergeMI));
775  (void)MIB;
776 
777  LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
778  LLVM_DEBUG(I->print(dbgs()));
779  LLVM_DEBUG(dbgs() << " ");
780  LLVM_DEBUG(MergeMI->print(dbgs()));
781  LLVM_DEBUG(dbgs() << " with instruction:\n ");
782  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
783  LLVM_DEBUG(dbgs() << "\n");
784 
785  // Erase the old instructions.
786  I->eraseFromParent();
787  MergeMI->eraseFromParent();
788  return NextI;
789 }
790 
791 // Apply Fn to all instructions between MI and the beginning of the block, until
792 // a def for DefReg is reached. Returns true, iff Fn returns true for all
793 // visited instructions. Stop after visiting Limit iterations.
795  const TargetRegisterInfo *TRI, unsigned Limit,
796  std::function<bool(MachineInstr &, bool)> &Fn) {
797  auto MBB = MI.getParent();
798  for (MachineInstr &I :
799  instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
800  if (!Limit)
801  return false;
802  --Limit;
803 
804  bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
805  return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
806  TRI->regsOverlap(MOP.getReg(), DefReg);
807  });
808  if (!Fn(I, isDef))
809  return false;
810  if (isDef)
811  break;
812  }
813  return true;
814 }
815 
817  const TargetRegisterInfo *TRI) {
818 
819  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
820  if (MOP.isReg() && MOP.isKill())
821  Units.removeReg(MOP.getReg());
822 
823  for (const MachineOperand &MOP : phys_regs_and_masks(MI))
824  if (MOP.isReg() && !MOP.isKill())
825  Units.addReg(MOP.getReg());
826 }
827 
829 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
831  const LdStPairFlags &Flags) {
832  MachineBasicBlock::iterator E = I->getParent()->end();
834  // If NextI is the second of the two instructions to be merged, we need
835  // to skip one further. Either way we merge will invalidate the iterator,
836  // and we don't need to scan the new instruction, as it's a pairwise
837  // instruction, which we're not considering for further action anyway.
838  if (NextI == Paired)
839  NextI = next_nodbg(NextI, E);
840 
841  int SExtIdx = Flags.getSExtIdx();
842  unsigned Opc =
843  SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
844  bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
845  int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
846 
847  bool MergeForward = Flags.getMergeForward();
848 
849  std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
850  if (MergeForward && RenameReg) {
851  MCRegister RegToRename = getLdStRegOp(*I).getReg();
852  DefinedInBB.addReg(*RenameReg);
853 
854  // Return the sub/super register for RenameReg, matching the size of
855  // OriginalReg.
856  auto GetMatchingSubReg = [this,
857  RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
858  for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
859  if (TRI->getMinimalPhysRegClass(OriginalReg) ==
860  TRI->getMinimalPhysRegClass(SubOrSuper))
861  return SubOrSuper;
862  llvm_unreachable("Should have found matching sub or super register!");
863  };
864 
865  std::function<bool(MachineInstr &, bool)> UpdateMIs =
866  [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
867  if (IsDef) {
868  bool SeenDef = false;
869  for (auto &MOP : MI.operands()) {
870  // Rename the first explicit definition and all implicit
871  // definitions matching RegToRename.
872  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
873  (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
874  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
875  assert((MOP.isImplicit() ||
876  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
877  "Need renamable operands");
878  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
879  SeenDef = true;
880  }
881  }
882  } else {
883  for (auto &MOP : MI.operands()) {
884  if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
885  TRI->regsOverlap(MOP.getReg(), RegToRename)) {
886  assert((MOP.isImplicit() ||
887  (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
888  "Need renamable operands");
889  MOP.setReg(GetMatchingSubReg(MOP.getReg()));
890  }
891  }
892  }
893  LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
894  return true;
895  };
896  forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
897 
898 #if !defined(NDEBUG)
899  // Make sure the register used for renaming is not used between the paired
900  // instructions. That would trash the content before the new paired
901  // instruction.
902  for (auto &MI :
904  std::next(I), std::next(Paired)))
905  assert(all_of(MI.operands(),
906  [this, &RenameReg](const MachineOperand &MOP) {
907  return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
908  MOP.isUndef() ||
909  !TRI->regsOverlap(MOP.getReg(), *RenameReg);
910  }) &&
911  "Rename register used between paired instruction, trashing the "
912  "content");
913 #endif
914  }
915 
916  // Insert our new paired instruction after whichever of the paired
917  // instructions MergeForward indicates.
918  MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
919  // Also based on MergeForward is from where we copy the base register operand
920  // so we get the flags compatible with the input code.
921  const MachineOperand &BaseRegOp =
922  MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
923  : AArch64InstrInfo::getLdStBaseOp(*I);
924 
926  int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
927  bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
928  if (IsUnscaled != PairedIsUnscaled) {
929  // We're trying to pair instructions that differ in how they are scaled. If
930  // I is scaled then scale the offset of Paired accordingly. Otherwise, do
931  // the opposite (i.e., make Paired's offset unscaled).
932  int MemSize = TII->getMemScale(*Paired);
933  if (PairedIsUnscaled) {
934  // If the unscaled offset isn't a multiple of the MemSize, we can't
935  // pair the operations together.
936  assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
937  "Offset should be a multiple of the stride!");
938  PairedOffset /= MemSize;
939  } else {
940  PairedOffset *= MemSize;
941  }
942  }
943 
944  // Which register is Rt and which is Rt2 depends on the offset order.
945  // However, for pre load/stores the Rt should be the one of the pre
946  // load/store.
947  MachineInstr *RtMI, *Rt2MI;
948  if (Offset == PairedOffset + OffsetStride &&
950  RtMI = &*Paired;
951  Rt2MI = &*I;
952  // Here we swapped the assumption made for SExtIdx.
953  // I.e., we turn ldp I, Paired into ldp Paired, I.
954  // Update the index accordingly.
955  if (SExtIdx != -1)
956  SExtIdx = (SExtIdx + 1) % 2;
957  } else {
958  RtMI = &*I;
959  Rt2MI = &*Paired;
960  }
961  int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
962  // Scale the immediate offset, if necessary.
963  if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
964  assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
965  "Unscaled offset cannot be scaled.");
966  OffsetImm /= TII->getMemScale(*RtMI);
967  }
968 
969  // Construct the new instruction.
971  DebugLoc DL = I->getDebugLoc();
972  MachineBasicBlock *MBB = I->getParent();
973  MachineOperand RegOp0 = getLdStRegOp(*RtMI);
974  MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
975  // Kill flags may become invalid when moving stores for pairing.
976  if (RegOp0.isUse()) {
977  if (!MergeForward) {
978  // Clear kill flags on store if moving upwards. Example:
979  // STRWui %w0, ...
980  // USE %w1
981  // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
982  RegOp0.setIsKill(false);
983  RegOp1.setIsKill(false);
984  } else {
985  // Clear kill flags of the first stores register. Example:
986  // STRWui %w1, ...
987  // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
988  // STRW %w0
990  for (MachineInstr &MI : make_range(std::next(I), Paired))
991  MI.clearRegisterKills(Reg, TRI);
992  }
993  }
994 
995  unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
996  MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
997 
998  // Adds the pre-index operand for pre-indexed ld/st pairs.
999  if (AArch64InstrInfo::isPreLdSt(*RtMI))
1000  MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1001 
1002  MIB.add(RegOp0)
1003  .add(RegOp1)
1004  .add(BaseRegOp)
1005  .addImm(OffsetImm)
1006  .cloneMergedMemRefs({&*I, &*Paired})
1007  .setMIFlags(I->mergeFlagsWith(*Paired));
1008 
1009  (void)MIB;
1010 
1011  LLVM_DEBUG(
1012  dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1013  LLVM_DEBUG(I->print(dbgs()));
1014  LLVM_DEBUG(dbgs() << " ");
1015  LLVM_DEBUG(Paired->print(dbgs()));
1016  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1017  if (SExtIdx != -1) {
1018  // Generate the sign extension for the proper result of the ldp.
1019  // I.e., with X1, that would be:
1020  // %w1 = KILL %w1, implicit-def %x1
1021  // %x1 = SBFMXri killed %x1, 0, 31
1022  MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1023  // Right now, DstMO has the extended register, since it comes from an
1024  // extended opcode.
1025  Register DstRegX = DstMO.getReg();
1026  // Get the W variant of that register.
1027  Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1028  // Update the result of LDP to use the W instead of the X variant.
1029  DstMO.setReg(DstRegW);
1030  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1031  LLVM_DEBUG(dbgs() << "\n");
1032  // Make the machine verifier happy by providing a definition for
1033  // the X register.
1034  // Insert this definition right after the generated LDP, i.e., before
1035  // InsertionPoint.
1036  MachineInstrBuilder MIBKill =
1037  BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1038  .addReg(DstRegW)
1039  .addReg(DstRegX, RegState::Define);
1040  MIBKill->getOperand(2).setImplicit();
1041  // Create the sign extension.
1042  MachineInstrBuilder MIBSXTW =
1043  BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1044  .addReg(DstRegX)
1045  .addImm(0)
1046  .addImm(31);
1047  (void)MIBSXTW;
1048  LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1049  LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1050  } else {
1051  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1052  }
1053  LLVM_DEBUG(dbgs() << "\n");
1054 
1055  if (MergeForward)
1056  for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1057  if (MOP.isReg() && MOP.isKill())
1058  DefinedInBB.addReg(MOP.getReg());
1059 
1060  // Erase the old instructions.
1061  I->eraseFromParent();
1062  Paired->eraseFromParent();
1063 
1064  return NextI;
1065 }
1066 
1068 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1069  MachineBasicBlock::iterator StoreI) {
1071  next_nodbg(LoadI, LoadI->getParent()->end());
1072 
1073  int LoadSize = TII->getMemScale(*LoadI);
1074  int StoreSize = TII->getMemScale(*StoreI);
1075  Register LdRt = getLdStRegOp(*LoadI).getReg();
1076  const MachineOperand &StMO = getLdStRegOp(*StoreI);
1077  Register StRt = getLdStRegOp(*StoreI).getReg();
1078  bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1079 
1080  assert((IsStoreXReg ||
1081  TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1082  "Unexpected RegClass");
1083 
1084  MachineInstr *BitExtMI;
1085  if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1086  // Remove the load, if the destination register of the loads is the same
1087  // register for stored value.
1088  if (StRt == LdRt && LoadSize == 8) {
1089  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1090  LoadI->getIterator())) {
1091  if (MI.killsRegister(StRt, TRI)) {
1092  MI.clearRegisterKills(StRt, TRI);
1093  break;
1094  }
1095  }
1096  LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1097  LLVM_DEBUG(LoadI->print(dbgs()));
1098  LLVM_DEBUG(dbgs() << "\n");
1099  LoadI->eraseFromParent();
1100  return NextI;
1101  }
1102  // Replace the load with a mov if the load and store are in the same size.
1103  BitExtMI =
1104  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1105  TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1106  .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1107  .add(StMO)
1109  .setMIFlags(LoadI->getFlags());
1110  } else {
1111  // FIXME: Currently we disable this transformation in big-endian targets as
1112  // performance and correctness are verified only in little-endian.
1113  if (!Subtarget->isLittleEndian())
1114  return NextI;
1115  bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1116  assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1117  "Unsupported ld/st match");
1118  assert(LoadSize <= StoreSize && "Invalid load size");
1119  int UnscaledLdOffset =
1120  IsUnscaled
1122  : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1123  int UnscaledStOffset =
1124  IsUnscaled
1126  : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1127  int Width = LoadSize * 8;
1128  Register DestReg =
1129  IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1130  LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1131  : LdRt;
1132 
1133  assert((UnscaledLdOffset >= UnscaledStOffset &&
1134  (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1135  "Invalid offset");
1136 
1137  int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1138  int Imms = Immr + Width - 1;
1139  if (UnscaledLdOffset == UnscaledStOffset) {
1140  uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1141  | ((Immr) << 6) // immr
1142  | ((Imms) << 0) // imms
1143  ;
1144 
1145  BitExtMI =
1146  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1147  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1148  DestReg)
1149  .add(StMO)
1150  .addImm(AndMaskEncoded)
1151  .setMIFlags(LoadI->getFlags());
1152  } else {
1153  BitExtMI =
1154  BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1155  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1156  DestReg)
1157  .add(StMO)
1158  .addImm(Immr)
1159  .addImm(Imms)
1160  .setMIFlags(LoadI->getFlags());
1161  }
1162  }
1163 
1164  // Clear kill flags between store and load.
1165  for (MachineInstr &MI : make_range(StoreI->getIterator(),
1166  BitExtMI->getIterator()))
1167  if (MI.killsRegister(StRt, TRI)) {
1168  MI.clearRegisterKills(StRt, TRI);
1169  break;
1170  }
1171 
1172  LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1173  LLVM_DEBUG(StoreI->print(dbgs()));
1174  LLVM_DEBUG(dbgs() << " ");
1175  LLVM_DEBUG(LoadI->print(dbgs()));
1176  LLVM_DEBUG(dbgs() << " with instructions:\n ");
1177  LLVM_DEBUG(StoreI->print(dbgs()));
1178  LLVM_DEBUG(dbgs() << " ");
1179  LLVM_DEBUG((BitExtMI)->print(dbgs()));
1180  LLVM_DEBUG(dbgs() << "\n");
1181 
1182  // Erase the old instructions.
1183  LoadI->eraseFromParent();
1184  return NextI;
1185 }
1186 
1187 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1188  // Convert the byte-offset used by unscaled into an "element" offset used
1189  // by the scaled pair load/store instructions.
1190  if (IsUnscaled) {
1191  // If the byte-offset isn't a multiple of the stride, there's no point
1192  // trying to match it.
1193  if (Offset % OffsetStride)
1194  return false;
1195  Offset /= OffsetStride;
1196  }
1197  return Offset <= 63 && Offset >= -64;
1198 }
1199 
1200 // Do alignment, specialized to power of 2 and for signed ints,
1201 // avoiding having to do a C-style cast from uint_64t to int when
1202 // using alignTo from include/llvm/Support/MathExtras.h.
1203 // FIXME: Move this function to include/MathExtras.h?
1204 static int alignTo(int Num, int PowOf2) {
1205  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1206 }
1207 
1208 static bool mayAlias(MachineInstr &MIa,
1210  AliasAnalysis *AA) {
1211  for (MachineInstr *MIb : MemInsns)
1212  if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
1213  return true;
1214 
1215  return false;
1216 }
1217 
1218 bool AArch64LoadStoreOpt::findMatchingStore(
1219  MachineBasicBlock::iterator I, unsigned Limit,
1220  MachineBasicBlock::iterator &StoreI) {
1221  MachineBasicBlock::iterator B = I->getParent()->begin();
1223  MachineInstr &LoadMI = *I;
1224  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg();
1225 
1226  // If the load is the first instruction in the block, there's obviously
1227  // not any matching store.
1228  if (MBBI == B)
1229  return false;
1230 
1231  // Track which register units have been modified and used between the first
1232  // insn and the second insn.
1233  ModifiedRegUnits.clear();
1234  UsedRegUnits.clear();
1235 
1236  unsigned Count = 0;
1237  do {
1238  MBBI = prev_nodbg(MBBI, B);
1239  MachineInstr &MI = *MBBI;
1240 
1241  // Don't count transient instructions towards the search limit since there
1242  // may be different numbers of them if e.g. debug information is present.
1243  if (!MI.isTransient())
1244  ++Count;
1245 
1246  // If the load instruction reads directly from the address to which the
1247  // store instruction writes and the stored value is not modified, we can
1248  // promote the load. Since we do not handle stores with pre-/post-index,
1249  // it's unnecessary to check if BaseReg is modified by the store itself.
1250  // Also we can't handle stores without an immediate offset operand,
1251  // while the operand might be the address for a global variable.
1252  if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1253  BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() &&
1255  isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1256  ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1257  StoreI = MBBI;
1258  return true;
1259  }
1260 
1261  if (MI.isCall())
1262  return false;
1263 
1264  // Update modified / uses register units.
1265  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1266 
1267  // Otherwise, if the base register is modified, we have no match, so
1268  // return early.
1269  if (!ModifiedRegUnits.available(BaseReg))
1270  return false;
1271 
1272  // If we encounter a store aliased with the load, return early.
1273  if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1274  return false;
1275  } while (MBBI != B && Count < Limit);
1276  return false;
1277 }
1278 
1279 static bool needsWinCFI(const MachineFunction *MF) {
1280  return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1282 }
1283 
1284 // Returns true if FirstMI and MI are candidates for merging or pairing.
1285 // Otherwise, returns false.
1287  LdStPairFlags &Flags,
1288  const AArch64InstrInfo *TII) {
1289  // If this is volatile or if pairing is suppressed, not a candidate.
1290  if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1291  return false;
1292 
1293  // We should have already checked FirstMI for pair suppression and volatility.
1294  assert(!FirstMI.hasOrderedMemoryRef() &&
1295  !TII->isLdStPairSuppressed(FirstMI) &&
1296  "FirstMI shouldn't get here if either of these checks are true.");
1297 
1298  if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1299  MI.getFlag(MachineInstr::FrameDestroy)))
1300  return false;
1301 
1302  unsigned OpcA = FirstMI.getOpcode();
1303  unsigned OpcB = MI.getOpcode();
1304 
1305  // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1306  if (OpcA == OpcB)
1307  return !AArch64InstrInfo::isPreLdSt(FirstMI);
1308 
1309  // Try to match a sign-extended load/store with a zero-extended load/store.
1310  bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1311  unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1312  assert(IsValidLdStrOpc &&
1313  "Given Opc should be a Load or Store with an immediate");
1314  // OpcA will be the first instruction in the pair.
1315  if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1316  Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
1317  return true;
1318  }
1319 
1320  // If the second instruction isn't even a mergable/pairable load/store, bail
1321  // out.
1322  if (!PairIsValidLdStrOpc)
1323  return false;
1324 
1325  // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
1326  // offsets.
1327  if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1328  return false;
1329 
1330  // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1331  // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
1332  // are candidate pairs that can be merged.
1333  if (isPreLdStPairCandidate(FirstMI, MI))
1334  return true;
1335 
1336  // Try to match an unscaled load/store with a scaled load/store.
1337  return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1339 
1340  // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1341 }
1342 
1343 static bool
1344 canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
1346  const TargetRegisterInfo *TRI) {
1347  if (!FirstMI.mayStore())
1348  return false;
1349 
1350  // Check if we can find an unused register which we can use to rename
1351  // the register used by the first load/store.
1352  auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1353  MachineFunction &MF = *FirstMI.getParent()->getParent();
1354  if (!RegClass || !MF.getRegInfo().tracksLiveness())
1355  return false;
1356 
1357  auto RegToRename = getLdStRegOp(FirstMI).getReg();
1358  // For now, we only rename if the store operand gets killed at the store.
1359  if (!getLdStRegOp(FirstMI).isKill() &&
1360  !any_of(FirstMI.operands(),
1361  [TRI, RegToRename](const MachineOperand &MOP) {
1362  return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1363  MOP.isImplicit() && MOP.isKill() &&
1364  TRI->regsOverlap(RegToRename, MOP.getReg());
1365  })) {
1366  LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
1367  return false;
1368  }
1369  auto canRenameMOP = [TRI](const MachineOperand &MOP) {
1370  if (MOP.isReg()) {
1371  auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1372  // Renaming registers with multiple disjunct sub-registers (e.g. the
1373  // result of a LD3) means that all sub-registers are renamed, potentially
1374  // impacting other instructions we did not check. Bail out.
1375  // Note that this relies on the structure of the AArch64 register file. In
1376  // particular, a subregister cannot be written without overwriting the
1377  // whole register.
1378  if (RegClass->HasDisjunctSubRegs) {
1379  LLVM_DEBUG(
1380  dbgs()
1381  << " Cannot rename operands with multiple disjunct subregisters ("
1382  << MOP << ")\n");
1383  return false;
1384  }
1385  }
1386  return MOP.isImplicit() ||
1387  (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1388  };
1389 
1390  bool FoundDef = false;
1391 
1392  // For each instruction between FirstMI and the previous def for RegToRename,
1393  // we
1394  // * check if we can rename RegToRename in this instruction
1395  // * collect the registers used and required register classes for RegToRename.
1396  std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1397  bool IsDef) {
1398  LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
1399  // Currently we do not try to rename across frame-setup instructions.
1400  if (MI.getFlag(MachineInstr::FrameSetup)) {
1401  LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
1402  << MI << ")\n");
1403  return false;
1404  }
1405 
1406  UsedInBetween.accumulate(MI);
1407 
1408  // For a definition, check that we can rename the definition and exit the
1409  // loop.
1410  FoundDef = IsDef;
1411 
1412  // For defs, check if we can rename the first def of RegToRename.
1413  if (FoundDef) {
1414  // For some pseudo instructions, we might not generate code in the end
1415  // (e.g. KILL) and we would end up without a correct def for the rename
1416  // register.
1417  // TODO: This might be overly conservative and we could handle those cases
1418  // in multiple ways:
1419  // 1. Insert an extra copy, to materialize the def.
1420  // 2. Skip pseudo-defs until we find an non-pseudo def.
1421  if (MI.isPseudo()) {
1422  LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI
1423  << "\n");
1424  return false;
1425  }
1426 
1427  for (auto &MOP : MI.operands()) {
1428  if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1429  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1430  continue;
1431  if (!canRenameMOP(MOP)) {
1432  LLVM_DEBUG(dbgs()
1433  << " Cannot rename " << MOP << " in " << MI << "\n");
1434  return false;
1435  }
1436  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1437  }
1438  return true;
1439  } else {
1440  for (auto &MOP : MI.operands()) {
1441  if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1442  !TRI->regsOverlap(MOP.getReg(), RegToRename))
1443  continue;
1444 
1445  if (!canRenameMOP(MOP)) {
1446  LLVM_DEBUG(dbgs()
1447  << " Cannot rename " << MOP << " in " << MI << "\n");
1448  return false;
1449  }
1450  RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1451  }
1452  }
1453  return true;
1454  };
1455 
1456  if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1457  return false;
1458 
1459  if (!FoundDef) {
1460  LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1461  return false;
1462  }
1463  return true;
1464 }
1465 
1466 // Check if we can find a physical register for renaming \p Reg. This register
1467 // must:
1468 // * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1469 // defined registers up to the point where the renamed register will be used,
1470 // * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1471 // registers in the range the rename register will be used,
1472 // * is available in all used register classes (checked using RequiredClasses).
1473 static std::optional<MCPhysReg> tryToFindRegisterToRename(
1474  const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1475  LiveRegUnits &UsedInBetween,
1477  const TargetRegisterInfo *TRI) {
1478  const MachineRegisterInfo &RegInfo = MF.getRegInfo();
1479 
1480  // Checks if any sub- or super-register of PR is callee saved.
1481  auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1483  [&MF, TRI](MCPhysReg SubOrSuper) {
1484  return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1485  });
1486  };
1487 
1488  // Check if PR or one of its sub- or super-registers can be used for all
1489  // required register classes.
1490  auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1491  return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1493  [C, TRI](MCPhysReg SubOrSuper) {
1494  return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1495  });
1496  });
1497  };
1498 
1499  auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1500  for (const MCPhysReg &PR : *RegClass) {
1501  if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1502  !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1503  CanBeUsedForAllClasses(PR)) {
1504  DefinedInBB.addReg(PR);
1505  LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1506  << "\n");
1507  return {PR};
1508  }
1509  }
1510  LLVM_DEBUG(dbgs() << "No rename register found from "
1511  << TRI->getRegClassName(RegClass) << "\n");
1512  return std::nullopt;
1513 }
1514 
1515 /// Scan the instructions looking for a load/store that can be combined with the
1516 /// current instruction into a wider equivalent or a load/store pair.
1518 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1519  LdStPairFlags &Flags, unsigned Limit,
1520  bool FindNarrowMerge) {
1521  MachineBasicBlock::iterator E = I->getParent()->end();
1523  MachineBasicBlock::iterator MBBIWithRenameReg;
1524  MachineInstr &FirstMI = *I;
1525  MBBI = next_nodbg(MBBI, E);
1526 
1527  bool MayLoad = FirstMI.mayLoad();
1528  bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1529  Register Reg = getLdStRegOp(FirstMI).getReg();
1530  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg();
1532  int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1533  bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1534 
1535  std::optional<bool> MaybeCanRename;
1536  if (!EnableRenaming)
1537  MaybeCanRename = {false};
1538 
1540  LiveRegUnits UsedInBetween;
1541  UsedInBetween.init(*TRI);
1542 
1543  Flags.clearRenameReg();
1544 
1545  // Track which register units have been modified and used between the first
1546  // insn (inclusive) and the second insn.
1547  ModifiedRegUnits.clear();
1548  UsedRegUnits.clear();
1549 
1550  // Remember any instructions that read/write memory between FirstMI and MI.
1552 
1553  for (unsigned Count = 0; MBBI != E && Count < Limit;
1554  MBBI = next_nodbg(MBBI, E)) {
1555  MachineInstr &MI = *MBBI;
1556 
1557  UsedInBetween.accumulate(MI);
1558 
1559  // Don't count transient instructions towards the search limit since there
1560  // may be different numbers of them if e.g. debug information is present.
1561  if (!MI.isTransient())
1562  ++Count;
1563 
1564  Flags.setSExtIdx(-1);
1565  if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
1567  assert(MI.mayLoadOrStore() && "Expected memory operation.");
1568  // If we've found another instruction with the same opcode, check to see
1569  // if the base and offset are compatible with our starting instruction.
1570  // These instructions all have scaled immediate operands, so we just
1571  // check for +1/-1. Make sure to check the new instruction offset is
1572  // actually an immediate and not a symbolic reference destined for
1573  // a relocation.
1575  int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
1576  bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
1577  if (IsUnscaled != MIIsUnscaled) {
1578  // We're trying to pair instructions that differ in how they are scaled.
1579  // If FirstMI is scaled then scale the offset of MI accordingly.
1580  // Otherwise, do the opposite (i.e., make MI's offset unscaled).
1581  int MemSize = TII->getMemScale(MI);
1582  if (MIIsUnscaled) {
1583  // If the unscaled offset isn't a multiple of the MemSize, we can't
1584  // pair the operations together: bail and keep looking.
1585  if (MIOffset % MemSize) {
1586  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1587  UsedRegUnits, TRI);
1588  MemInsns.push_back(&MI);
1589  continue;
1590  }
1591  MIOffset /= MemSize;
1592  } else {
1593  MIOffset *= MemSize;
1594  }
1595  }
1596 
1597  bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
1598 
1599  if (BaseReg == MIBaseReg) {
1600  // If the offset of the second ld/st is not equal to the size of the
1601  // destination register it can’t be paired with a pre-index ld/st
1602  // pair. Additionally if the base reg is used or modified the operations
1603  // can't be paired: bail and keep looking.
1604  if (IsPreLdSt) {
1605  bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
1606  bool IsBaseRegUsed = !UsedRegUnits.available(
1608  bool IsBaseRegModified = !ModifiedRegUnits.available(
1610  // If the stored value and the address of the second instruction is
1611  // the same, it needs to be using the updated register and therefore
1612  // it must not be folded.
1613  bool IsMIRegTheSame =
1616  if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1617  IsMIRegTheSame) {
1618  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1619  UsedRegUnits, TRI);
1620  MemInsns.push_back(&MI);
1621  continue;
1622  }
1623  } else {
1624  if ((Offset != MIOffset + OffsetStride) &&
1625  (Offset + OffsetStride != MIOffset)) {
1626  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1627  UsedRegUnits, TRI);
1628  MemInsns.push_back(&MI);
1629  continue;
1630  }
1631  }
1632 
1633  int MinOffset = Offset < MIOffset ? Offset : MIOffset;
1634  if (FindNarrowMerge) {
1635  // If the alignment requirements of the scaled wide load/store
1636  // instruction can't express the offset of the scaled narrow input,
1637  // bail and keep looking. For promotable zero stores, allow only when
1638  // the stored value is the same (i.e., WZR).
1639  if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1640  (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
1641  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1642  UsedRegUnits, TRI);
1643  MemInsns.push_back(&MI);
1644  continue;
1645  }
1646  } else {
1647  // Pairwise instructions have a 7-bit signed offset field. Single
1648  // insns have a 12-bit unsigned offset field. If the resultant
1649  // immediate offset of merging these instructions is out of range for
1650  // a pairwise instruction, bail and keep looking.
1651  if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1652  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1653  UsedRegUnits, TRI);
1654  MemInsns.push_back(&MI);
1655  continue;
1656  }
1657  // If the alignment requirements of the paired (scaled) instruction
1658  // can't express the offset of the unscaled input, bail and keep
1659  // looking.
1660  if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
1661  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1662  UsedRegUnits, TRI);
1663  MemInsns.push_back(&MI);
1664  continue;
1665  }
1666  }
1667  // If the destination register of one load is the same register or a
1668  // sub/super register of the other load, bail and keep looking. A
1669  // load-pair instruction with both destination registers the same is
1670  // UNPREDICTABLE and will result in an exception.
1671  if (MayLoad &&
1673  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1674  TRI);
1675  MemInsns.push_back(&MI);
1676  continue;
1677  }
1678 
1679  // If the BaseReg has been modified, then we cannot do the optimization.
1680  // For example, in the following pattern
1681  // ldr x1 [x2]
1682  // ldr x2 [x3]
1683  // ldr x4 [x2, #8],
1684  // the first and third ldr cannot be converted to ldp x1, x4, [x2]
1685  if (!ModifiedRegUnits.available(BaseReg))
1686  return E;
1687 
1688  // If the Rt of the second instruction was not modified or used between
1689  // the two instructions and none of the instructions between the second
1690  // and first alias with the second, we can combine the second into the
1691  // first.
1692  if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
1693  !(MI.mayLoad() &&
1694  !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
1695  !mayAlias(MI, MemInsns, AA)) {
1696 
1697  Flags.setMergeForward(false);
1698  Flags.clearRenameReg();
1699  return MBBI;
1700  }
1701 
1702  // Likewise, if the Rt of the first instruction is not modified or used
1703  // between the two instructions and none of the instructions between the
1704  // first and the second alias with the first, we can combine the first
1705  // into the second.
1706  if (!(MayLoad &&
1707  !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
1708  !mayAlias(FirstMI, MemInsns, AA)) {
1709 
1710  if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
1711  Flags.setMergeForward(true);
1712  Flags.clearRenameReg();
1713  return MBBI;
1714  }
1715 
1716  if (DebugCounter::shouldExecute(RegRenamingCounter)) {
1717  if (!MaybeCanRename)
1718  MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
1719  RequiredClasses, TRI)};
1720 
1721  if (*MaybeCanRename) {
1722  std::optional<MCPhysReg> MaybeRenameReg =
1724  Reg, DefinedInBB, UsedInBetween,
1725  RequiredClasses, TRI);
1726  if (MaybeRenameReg) {
1727  Flags.setRenameReg(*MaybeRenameReg);
1728  Flags.setMergeForward(true);
1729  MBBIWithRenameReg = MBBI;
1730  }
1731  }
1732  }
1733  }
1734  // Unable to combine these instructions due to interference in between.
1735  // Keep looking.
1736  }
1737  }
1738 
1739  if (Flags.getRenameReg())
1740  return MBBIWithRenameReg;
1741 
1742  // If the instruction wasn't a matching load or store. Stop searching if we
1743  // encounter a call instruction that might modify memory.
1744  if (MI.isCall())
1745  return E;
1746 
1747  // Update modified / uses register units.
1748  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1749 
1750  // Otherwise, if the base register is modified, we have no match, so
1751  // return early.
1752  if (!ModifiedRegUnits.available(BaseReg))
1753  return E;
1754 
1755  // Update list of instructions that read/write memory.
1756  if (MI.mayLoadOrStore())
1757  MemInsns.push_back(&MI);
1758  }
1759  return E;
1760 }
1761 
1764  auto End = MI.getParent()->end();
1765  if (MaybeCFI == End ||
1766  MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
1767  !(MI.getFlag(MachineInstr::FrameSetup) ||
1768  MI.getFlag(MachineInstr::FrameDestroy)) ||
1769  AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
1770  return End;
1771 
1772  const MachineFunction &MF = *MI.getParent()->getParent();
1773  unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
1774  const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
1775  switch (CFI.getOperation()) {
1778  return MaybeCFI;
1779  default:
1780  return End;
1781  }
1782 }
1783 
1785 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
1787  bool IsPreIdx) {
1788  assert((Update->getOpcode() == AArch64::ADDXri ||
1789  Update->getOpcode() == AArch64::SUBXri) &&
1790  "Unexpected base register update instruction to merge!");
1791  MachineBasicBlock::iterator E = I->getParent()->end();
1793 
1794  // If updating the SP and the following instruction is CFA offset related CFI
1795  // instruction move it after the merged instruction.
1797  IsPreIdx ? maybeMoveCFI(*Update, next_nodbg(Update, E)) : E;
1798 
1799  // Return the instruction following the merged instruction, which is
1800  // the instruction following our unmerged load. Unless that's the add/sub
1801  // instruction we're merging, in which case it's the one after that.
1802  if (NextI == Update)
1803  NextI = next_nodbg(NextI, E);
1804 
1805  int Value = Update->getOperand(2).getImm();
1806  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
1807  "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1808  if (Update->getOpcode() == AArch64::SUBXri)
1809  Value = -Value;
1810 
1811  unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
1813  MachineInstrBuilder MIB;
1814  int Scale, MinOffset, MaxOffset;
1815  getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
1817  // Non-paired instruction.
1818  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1819  .add(getLdStRegOp(*Update))
1820  .add(getLdStRegOp(*I))
1822  .addImm(Value / Scale)
1823  .setMemRefs(I->memoperands())
1824  .setMIFlags(I->mergeFlagsWith(*Update));
1825  } else {
1826  // Paired instruction.
1827  MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1828  .add(getLdStRegOp(*Update))
1829  .add(getLdStRegOp(*I, 0))
1830  .add(getLdStRegOp(*I, 1))
1832  .addImm(Value / Scale)
1833  .setMemRefs(I->memoperands())
1834  .setMIFlags(I->mergeFlagsWith(*Update));
1835  }
1836  if (CFI != E) {
1837  MachineBasicBlock *MBB = I->getParent();
1838  MBB->splice(std::next(MIB.getInstr()->getIterator()), MBB, CFI);
1839  }
1840 
1841  if (IsPreIdx) {
1842  ++NumPreFolded;
1843  LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
1844  } else {
1845  ++NumPostFolded;
1846  LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
1847  }
1848  LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
1849  LLVM_DEBUG(I->print(dbgs()));
1850  LLVM_DEBUG(dbgs() << " ");
1851  LLVM_DEBUG(Update->print(dbgs()));
1852  LLVM_DEBUG(dbgs() << " with instruction:\n ");
1853  LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1854  LLVM_DEBUG(dbgs() << "\n");
1855 
1856  // Erase the old instructions for the block.
1857  I->eraseFromParent();
1858  Update->eraseFromParent();
1859 
1860  return NextI;
1861 }
1862 
1863 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
1864  MachineInstr &MI,
1865  unsigned BaseReg, int Offset) {
1866  switch (MI.getOpcode()) {
1867  default:
1868  break;
1869  case AArch64::SUBXri:
1870  case AArch64::ADDXri:
1871  // Make sure it's a vanilla immediate operand, not a relocation or
1872  // anything else we can't handle.
1873  if (!MI.getOperand(2).isImm())
1874  break;
1875  // Watch out for 1 << 12 shifted value.
1876  if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
1877  break;
1878 
1879  // The update instruction source and destination register must be the
1880  // same as the load/store base register.
1881  if (MI.getOperand(0).getReg() != BaseReg ||
1882  MI.getOperand(1).getReg() != BaseReg)
1883  break;
1884 
1885  int UpdateOffset = MI.getOperand(2).getImm();
1886  if (MI.getOpcode() == AArch64::SUBXri)
1887  UpdateOffset = -UpdateOffset;
1888 
1889  // The immediate must be a multiple of the scaling factor of the pre/post
1890  // indexed instruction.
1891  int Scale, MinOffset, MaxOffset;
1892  getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
1893  if (UpdateOffset % Scale != 0)
1894  break;
1895 
1896  // Scaled offset must fit in the instruction immediate.
1897  int ScaledOffset = UpdateOffset / Scale;
1898  if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1899  break;
1900 
1901  // If we have a non-zero Offset, we check that it matches the amount
1902  // we're adding to the register.
1903  if (!Offset || Offset == UpdateOffset)
1904  return true;
1905  break;
1906  }
1907  return false;
1908 }
1909 
1910 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
1911  MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
1912  MachineBasicBlock::iterator E = I->getParent()->end();
1913  MachineInstr &MemMI = *I;
1915 
1916  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
1917  int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
1918  TII->getMemScale(MemMI);
1919 
1920  // Scan forward looking for post-index opportunities. Updating instructions
1921  // can't be formed if the memory instruction doesn't have the offset we're
1922  // looking for.
1923  if (MIUnscaledOffset != UnscaledOffset)
1924  return E;
1925 
1926  // If the base register overlaps a source/destination register, we can't
1927  // merge the update. This does not apply to tag store instructions which
1928  // ignore the address part of the source register.
1929  // This does not apply to STGPi as well, which does not have unpredictable
1930  // behavior in this case unlike normal stores, and always performs writeback
1931  // after reading the source register value.
1932  if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
1933  bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
1934  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1935  Register DestReg = getLdStRegOp(MemMI, i).getReg();
1936  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1937  return E;
1938  }
1939  }
1940 
1941  // Track which register units have been modified and used between the first
1942  // insn (inclusive) and the second insn.
1943  ModifiedRegUnits.clear();
1944  UsedRegUnits.clear();
1945  MBBI = next_nodbg(MBBI, E);
1946 
1947  // We can't post-increment the stack pointer if any instruction between
1948  // the memory access (I) and the increment (MBBI) can access the memory
1949  // region defined by [SP, MBBI].
1950  const bool BaseRegSP = BaseReg == AArch64::SP;
1951  if (BaseRegSP && needsWinCFI(I->getMF())) {
1952  // FIXME: For now, we always block the optimization over SP in windows
1953  // targets as it requires to adjust the unwind/debug info, messing up
1954  // the unwind info can actually cause a miscompile.
1955  return E;
1956  }
1957 
1958  for (unsigned Count = 0; MBBI != E && Count < Limit;
1959  MBBI = next_nodbg(MBBI, E)) {
1960  MachineInstr &MI = *MBBI;
1961 
1962  // Don't count transient instructions towards the search limit since there
1963  // may be different numbers of them if e.g. debug information is present.
1964  if (!MI.isTransient())
1965  ++Count;
1966 
1967  // If we found a match, return it.
1968  if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1969  return MBBI;
1970 
1971  // Update the status of what the instruction clobbered and used.
1972  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1973 
1974  // Otherwise, if the base register is used or modified, we have no match, so
1975  // return early.
1976  // If we are optimizing SP, do not allow instructions that may load or store
1977  // in between the load and the optimized value update.
1978  if (!ModifiedRegUnits.available(BaseReg) ||
1979  !UsedRegUnits.available(BaseReg) ||
1980  (BaseRegSP && MBBI->mayLoadOrStore()))
1981  return E;
1982  }
1983  return E;
1984 }
1985 
1986 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
1987  MachineBasicBlock::iterator I, unsigned Limit) {
1988  MachineBasicBlock::iterator B = I->getParent()->begin();
1989  MachineBasicBlock::iterator E = I->getParent()->end();
1990  MachineInstr &MemMI = *I;
1992  MachineFunction &MF = *MemMI.getMF();
1993 
1994  Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
1996 
1997  // If the load/store is the first instruction in the block, there's obviously
1998  // not any matching update. Ditto if the memory offset isn't zero.
1999  if (MBBI == B || Offset != 0)
2000  return E;
2001  // If the base register overlaps a destination register, we can't
2002  // merge the update.
2003  if (!isTagStore(MemMI)) {
2004  bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2005  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2006  Register DestReg = getLdStRegOp(MemMI, i).getReg();
2007  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2008  return E;
2009  }
2010  }
2011 
2012  const bool BaseRegSP = BaseReg == AArch64::SP;
2013  if (BaseRegSP && needsWinCFI(I->getMF())) {
2014  // FIXME: For now, we always block the optimization over SP in windows
2015  // targets as it requires to adjust the unwind/debug info, messing up
2016  // the unwind info can actually cause a miscompile.
2017  return E;
2018  }
2019 
2020  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2021  unsigned RedZoneSize =
2022  Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2023 
2024  // Track which register units have been modified and used between the first
2025  // insn (inclusive) and the second insn.
2026  ModifiedRegUnits.clear();
2027  UsedRegUnits.clear();
2028  unsigned Count = 0;
2029  bool MemAcessBeforeSPPreInc = false;
2030  do {
2031  MBBI = prev_nodbg(MBBI, B);
2032  MachineInstr &MI = *MBBI;
2033 
2034  // Don't count transient instructions towards the search limit since there
2035  // may be different numbers of them if e.g. debug information is present.
2036  if (!MI.isTransient())
2037  ++Count;
2038 
2039  // If we found a match, return it.
2040  if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2041  // Check that the update value is within our red zone limit (which may be
2042  // zero).
2043  if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2044  return E;
2045  return MBBI;
2046  }
2047 
2048  // Update the status of what the instruction clobbered and used.
2049  LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2050 
2051  // Otherwise, if the base register is used or modified, we have no match, so
2052  // return early.
2053  if (!ModifiedRegUnits.available(BaseReg) ||
2054  !UsedRegUnits.available(BaseReg))
2055  return E;
2056  // Keep track if we have a memory access before an SP pre-increment, in this
2057  // case we need to validate later that the update amount respects the red
2058  // zone.
2059  if (BaseRegSP && MBBI->mayLoadOrStore())
2060  MemAcessBeforeSPPreInc = true;
2061  } while (MBBI != B && Count < Limit);
2062  return E;
2063 }
2064 
2065 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2067  MachineInstr &MI = *MBBI;
2068  // If this is a volatile load, don't mess with it.
2069  if (MI.hasOrderedMemoryRef())
2070  return false;
2071 
2072  if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2073  return false;
2074 
2075  // Make sure this is a reg+imm.
2076  // FIXME: It is possible to extend it to handle reg+reg cases.
2078  return false;
2079 
2080  // Look backward up to LdStLimit instructions.
2082  if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2083  ++NumLoadsFromStoresPromoted;
2084  // Promote the load. Keeping the iterator straight is a
2085  // pain, so we let the merge routine tell us what the next instruction
2086  // is after it's done mucking about.
2087  MBBI = promoteLoadFromStore(MBBI, StoreI);
2088  return true;
2089  }
2090  return false;
2091 }
2092 
2093 // Merge adjacent zero stores into a wider store.
2094 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2096  assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2097  MachineInstr &MI = *MBBI;
2098  MachineBasicBlock::iterator E = MI.getParent()->end();
2099 
2100  if (!TII->isCandidateToMergeOrPair(MI))
2101  return false;
2102 
2103  // Look ahead up to LdStLimit instructions for a mergable instruction.
2104  LdStPairFlags Flags;
2105  MachineBasicBlock::iterator MergeMI =
2106  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2107  if (MergeMI != E) {
2108  ++NumZeroStoresPromoted;
2109 
2110  // Keeping the iterator straight is a pain, so we let the merge routine tell
2111  // us what the next instruction is after it's done mucking about.
2112  MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2113  return true;
2114  }
2115  return false;
2116 }
2117 
2118 // Find loads and stores that can be merged into a single load or store pair
2119 // instruction.
2120 bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2121  MachineInstr &MI = *MBBI;
2122  MachineBasicBlock::iterator E = MI.getParent()->end();
2123 
2124  if (!TII->isCandidateToMergeOrPair(MI))
2125  return false;
2126 
2127  // Early exit if the offset is not possible to match. (6 bits of positive
2128  // range, plus allow an extra one in case we find a later insn that matches
2129  // with Offset-1)
2130  bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2132  int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2133  // Allow one more for offset.
2134  if (Offset > 0)
2135  Offset -= OffsetStride;
2136  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2137  return false;
2138 
2139  // Look ahead up to LdStLimit instructions for a pairable instruction.
2140  LdStPairFlags Flags;
2142  findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2143  if (Paired != E) {
2144  ++NumPairCreated;
2145  if (TII->hasUnscaledLdStOffset(MI))
2146  ++NumUnscaledPairCreated;
2147  // Keeping the iterator straight is a pain, so we let the merge routine tell
2148  // us what the next instruction is after it's done mucking about.
2149  auto Prev = std::prev(MBBI);
2150  MBBI = mergePairedInsns(MBBI, Paired, Flags);
2151  // Collect liveness info for instructions between Prev and the new position
2152  // MBBI.
2153  for (auto I = std::next(Prev); I != MBBI; I++)
2154  updateDefinedRegisters(*I, DefinedInBB, TRI);
2155 
2156  return true;
2157  }
2158  return false;
2159 }
2160 
2161 bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2163  MachineInstr &MI = *MBBI;
2164  MachineBasicBlock::iterator E = MI.getParent()->end();
2166 
2167  // Look forward to try to form a post-index instruction. For example,
2168  // ldr x0, [x20]
2169  // add x20, x20, #32
2170  // merged into:
2171  // ldr x0, [x20], #32
2172  Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2173  if (Update != E) {
2174  // Merge the update into the ld/st.
2175  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
2176  return true;
2177  }
2178 
2179  // Don't know how to handle unscaled pre/post-index versions below, so bail.
2180  if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2181  return false;
2182 
2183  // Look back to try to find a pre-index instruction. For example,
2184  // add x0, x0, #8
2185  // ldr x1, [x0]
2186  // merged into:
2187  // ldr x1, [x0, #8]!
2188  Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
2189  if (Update != E) {
2190  // Merge the update into the ld/st.
2191  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2192  return true;
2193  }
2194 
2195  // The immediate in the load/store is scaled by the size of the memory
2196  // operation. The immediate in the add we're looking for,
2197  // however, is not, so adjust here.
2198  int UnscaledOffset =
2199  AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
2200 
2201  // Look forward to try to find a pre-index instruction. For example,
2202  // ldr x1, [x0, #64]
2203  // add x0, x0, #64
2204  // merged into:
2205  // ldr x1, [x0, #64]!
2206  Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2207  if (Update != E) {
2208  // Merge the update into the ld/st.
2209  MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2210  return true;
2211  }
2212 
2213  return false;
2214 }
2215 
2217  bool EnableNarrowZeroStOpt) {
2218 
2219  bool Modified = false;
2220  // Four tranformations to do here:
2221  // 1) Find loads that directly read from stores and promote them by
2222  // replacing with mov instructions. If the store is wider than the load,
2223  // the load will be replaced with a bitfield extract.
2224  // e.g.,
2225  // str w1, [x0, #4]
2226  // ldrh w2, [x0, #6]
2227  // ; becomes
2228  // str w1, [x0, #4]
2229  // lsr w2, w1, #16
2231  MBBI != E;) {
2232  if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
2233  Modified = true;
2234  else
2235  ++MBBI;
2236  }
2237  // 2) Merge adjacent zero stores into a wider store.
2238  // e.g.,
2239  // strh wzr, [x0]
2240  // strh wzr, [x0, #2]
2241  // ; becomes
2242  // str wzr, [x0]
2243  // e.g.,
2244  // str wzr, [x0]
2245  // str wzr, [x0, #4]
2246  // ; becomes
2247  // str xzr, [x0]
2248  if (EnableNarrowZeroStOpt)
2250  MBBI != E;) {
2251  if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
2252  Modified = true;
2253  else
2254  ++MBBI;
2255  }
2256  // 3) Find loads and stores that can be merged into a single load or store
2257  // pair instruction.
2258  // e.g.,
2259  // ldr x0, [x2]
2260  // ldr x1, [x2, #8]
2261  // ; becomes
2262  // ldp x0, x1, [x2]
2263 
2264  if (MBB.getParent()->getRegInfo().tracksLiveness()) {
2265  DefinedInBB.clear();
2266  DefinedInBB.addLiveIns(MBB);
2267  }
2268 
2270  MBBI != E;) {
2271  // Track currently live registers up to this point, to help with
2272  // searching for a rename register on demand.
2273  updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
2274  if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
2275  Modified = true;
2276  else
2277  ++MBBI;
2278  }
2279  // 4) Find base register updates that can be merged into the load or store
2280  // as a base-reg writeback.
2281  // e.g.,
2282  // ldr x0, [x2]
2283  // add x2, x2, #4
2284  // ; becomes
2285  // ldr x0, [x2], #4
2287  MBBI != E;) {
2288  if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
2289  Modified = true;
2290  else
2291  ++MBBI;
2292  }
2293 
2294  return Modified;
2295 }
2296 
2297 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2298  if (skipFunction(Fn.getFunction()))
2299  return false;
2300 
2301  Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
2302  TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
2303  TRI = Subtarget->getRegisterInfo();
2304  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2305 
2306  // Resize the modified and used register unit trackers. We do this once
2307  // per function and then clear the register units each time we optimize a load
2308  // or store.
2309  ModifiedRegUnits.init(*TRI);
2310  UsedRegUnits.init(*TRI);
2311  DefinedInBB.init(*TRI);
2312 
2313  bool Modified = false;
2314  bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2315  for (auto &MBB : Fn) {
2316  auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
2317  Modified |= M;
2318  }
2319 
2320  return Modified;
2321 }
2322 
2323 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
2324 // stores near one another? Note: The pre-RA instruction scheduler already has
2325 // hooks to try and schedule pairable loads/stores together to improve pairing
2326 // opportunities. Thus, pre-RA pairing pass may not be worth the effort.
2327 
2328 // FIXME: When pairing store instructions it's very possible for this pass to
2329 // hoist a store with a KILL marker above another use (without a KILL marker).
2330 // The resulting IR is invalid, but nothing uses the KILL markers after this
2331 // pass, so it's never caused a problem in practice.
2332 
2333 /// createAArch64LoadStoreOptimizationPass - returns an instance of the
2334 /// load / store optimization pass.
2336  return new AArch64LoadStoreOpt();
2337 }
i
i
Definition: README.txt:29
llvm::next_nodbg
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1289
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
MCDwarf.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
isMergeableLdStUpdate
static bool isMergeableLdStUpdate(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:664
AArch64MachineFunctionInfo.h
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:35
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::LiveRegUnits::accumulateUsedDefed
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
Definition: LiveRegUnits.h:47
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
StringRef.h
Pass.h
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:1001
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1056
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:509
llvm::SmallVector< MachineInstr *, 4 >
updateDefinedRegisters
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:816
Statistic.h
AARCH64_LOAD_STORE_OPT_NAME
#define AARCH64_LOAD_STORE_OPT_NAME
Definition: AArch64LoadStoreOptimizer.cpp:83
ErrorHandling.h
llvm::MachineOperand::isTied
bool isTied() const
Definition: MachineOperand.h:440
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64LoadStoreOptimizer.cpp:57
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::AArch64InstrInfo::isPreLdSt
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
Definition: AArch64InstrInfo.cpp:3189
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
MachineBasicBlock.h
llvm::LiveRegUnits::available
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:236
llvm::LiveRegUnits::addReg
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
Definition: LiveRegUnits.h:86
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:181
llvm::MCRegisterInfo::sub_and_superregs_inclusive
detail::concat_range< const MCPhysReg, iterator_range< mc_subreg_iterator >, iterator_range< mc_superreg_iterator > > sub_and_superregs_inclusive(MCRegister Reg) const
Return an iterator range over all sub- and super-registers of Reg, including Reg.
Definition: MCRegisterInfo.h:338
llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:678
isPromotableZeroStoreInst
static bool isPromotableZeroStoreInst(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:639
getMatchingWideOpcode
static unsigned getMatchingWideOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:299
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:127
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::createAArch64LoadStoreOptimizationPass
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
Definition: AArch64LoadStoreOptimizer.cpp:2335
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition: AArch64Subtarget.h:178
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
isImm
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
Definition: SPIRVInstructionSelector.cpp:1218
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1371
MachineRegisterInfo.h
llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition: MachineInstr.h:86
llvm::MachineFunction::getFrameInstructions
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
Definition: MachineFunction.h:1056
AliasAnalysis.h
llvm::MachineRegisterInfo::tracksLiveness
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Definition: MachineRegisterInfo.h:195
INITIALIZE_PASS
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:219
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MCCFIInstruction::OpDefCfa
@ OpDefCfa
Definition: MCDwarf.h:489
CommandLine.h
llvm::MachineOperand::isRenamable
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
Definition: MachineOperand.cpp:119
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:96
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:169
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:379
AArch64InstrInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::LiveRegUnits::accumulate
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
Definition: LiveRegUnits.cpp:69
llvm::AAResults
Definition: AliasAnalysis.h:294
getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:517
llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:35
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::MachineRegisterInfo::isReserved
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Definition: MachineRegisterInfo.h:930
getPostIndexedOpcode
static unsigned getPostIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:481
llvm::MachineInstr::mayAlias
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
Definition: MachineInstr.cpp:1323
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::phys_regs_and_masks
iterator_range< filter_iterator< ConstMIBundleOperands, std::function< bool(const MachineOperand &)> > > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
Definition: LiveRegUnits.h:166
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:196
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:84
mayAlias
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
Definition: AArch64LoadStoreOptimizer.cpp:1208
llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition: AArch64AddressingModes.h:99
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
LdStLimit
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoopDeletionResult::Modified
@ Modified
forAllMIsUntilDef
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
Definition: AArch64LoadStoreOptimizer.cpp:794
BitVector.h
llvm::AArch64_AM::getShiftValue
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
Definition: AArch64AddressingModes.h:86
llvm::MCCFIInstruction::getOperation
OpType getOperation() const
Definition: MCDwarf.h:642
DebugLoc.h
areCandidatesToMergeOrPair
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:1286
getPrePostIndexedMemOpInfo
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
Definition: AArch64LoadStoreOptimizer.cpp:592
llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
DEBUG_COUNTER
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming")
llvm::LiveRegUnits::removeReg
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
Definition: LiveRegUnits.h:102
EnableRenaming
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MCCFIInstruction
Definition: MCDwarf.h:479
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
isPreLdStPairCandidate
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:560
needsWinCFI
static bool needsWinCFI(const MachineFunction *MF)
Definition: AArch64LoadStoreOptimizer.cpp:1279
llvm::MCID::MayLoad
@ MayLoad
Definition: MCInstrDesc.h:167
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
AArch64AddressingModes.h
UpdateLimit
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::cl::opt
Definition: CommandLine.h:1411
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:421
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:770
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:419
llvm::MCCFIInstruction::OpDefCfaOffset
@ OpDefCfaOffset
Definition: MCDwarf.h:488
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:295
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
isTagStore
static bool isTagStore(const MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:236
llvm::TargetRegisterInfo::getRegClassName
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
Definition: TargetRegisterInfo.h:776
getPreIndexedOpcode
static unsigned getPreIndexedOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:408
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::TargetRegisterInfo::getMatchingSuperReg
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Definition: TargetRegisterInfo.h:600
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
MCRegisterInfo.h
llvm::prev_nodbg
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Definition: MachineBasicBlock.h:1296
MachineFunctionPass.h
llvm::LiveRegUnits::init
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition: LiveRegUnits.h:73
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineOperand::isEarlyClobber
bool isEarlyClobber() const
Definition: MachineOperand.h:435
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
canRenameUpToDef
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1344
maybeMoveCFI
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
Definition: AArch64LoadStoreOptimizer.cpp:1763
iterator_range.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition: GCNRegPressure.cpp:138
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:204
isLdOffsetInRangeOfSt
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
Definition: AArch64LoadStoreOptimizer.cpp:621
MCAsmInfo.h
inBoundsForPair
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
Definition: AArch64LoadStoreOptimizer.cpp:1187
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1009
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
tryToFindRegisterToRename
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
Definition: AArch64LoadStoreOptimizer.cpp:1473
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
uint32_t
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::MachineInstrBuilder::cloneMergedMemRefs
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
Definition: MachineInstrBuilder.h:219
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition: MachineInstrBuilder.h:89
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition: AArch64Subtarget.h:182
llvm::AArch64InstrInfo::getLdStBaseOp
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
Definition: AArch64InstrInfo.cpp:3213
isPromotableLoadFromStore
static bool isPromotableLoadFromStore(MachineInstr &MI)
Definition: AArch64LoadStoreOptimizer.cpp:646
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::AArch64InstrInfo::isPairedLdSt
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
Definition: AArch64InstrInfo.cpp:3193
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
llvm::initializeAArch64LoadStoreOptPass
void initializeAArch64LoadStoreOptPass(PassRegistry &)
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1069
optimizeBlock
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
Definition: ScalarizeMaskedMemIntrin.cpp:909
getMatchingNonSExtOpcode
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
Definition: AArch64LoadStoreOptimizer.cpp:248
DebugCounter.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::Function::needsUnwindTableEntry
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:620
llvm::MachineInstrBuilder::setMemRefs
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
Definition: MachineInstrBuilder.h:208
llvm::MCRegisterInfo::isSubRegister
bool isSubRegister(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA.
Definition: MCRegisterInfo.h:560
isMatchingStore
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
Definition: AArch64LoadStoreOptimizer.cpp:378
llvm::MachineOperand::setImplicit
void setImplicit(bool Val=true)
Definition: MachineOperand.h:504
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:436
llvm::MachineOperand::isDebug
bool isDebug() const
Definition: MachineOperand.h:445
llvm::AArch64TargetLowering::getRedZoneSize
unsigned getRedZoneSize(const Function &F) const
Definition: AArch64ISelLowering.h:882
AArch64Subtarget.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
MachineInstrBuilder.h
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:273
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:56
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:929
llvm::iterator_range
A range adaptor for a pair of iterators.
Definition: iterator_range.h:30
llvm::AArch64InstrInfo::getLdStOffsetOp
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the the immediate offset operator of a load/store.
Definition: AArch64InstrInfo.cpp:3221
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
llvm::TargetRegisterInfo::getSubReg
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Definition: TargetRegisterInfo.h:1141
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::AArch64InstrInfo::getMemScale
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
Definition: AArch64InstrInfo.cpp:3091
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::omp::RTLDependInfoFields::Flags
@ Flags
llvm::MCRegisterInfo::isSuperOrSubRegisterEq
bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a super-register or sub-register of RegA or if RegB == RegA.
Definition: MCRegisterInfo.h:580
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::TargetRegisterInfo::getMinimalPhysRegClass
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Definition: TargetRegisterInfo.cpp:212
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
raw_ostream.h
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
getMatchingPairOpcode
static unsigned getMatchingPairOpcode(unsigned Opc)
Definition: AArch64LoadStoreOptimizer.cpp:318
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:797
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:641
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
getLdStRegOp
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
Definition: AArch64LoadStoreOptimizer.cpp:610
llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition: MachineBasicBlock.h:1303
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365