55#define DEBUG_TYPE "aarch64-ldst-opt" 
   57STATISTIC(NumPairCreated, 
"Number of load/store pair instructions generated");
 
   58STATISTIC(NumPostFolded, 
"Number of post-index updates folded");
 
   59STATISTIC(NumPreFolded, 
"Number of pre-index updates folded");
 
   61          "Number of load/store from unscaled generated");
 
   62STATISTIC(NumZeroStoresPromoted, 
"Number of narrow zero stores promoted");
 
   63STATISTIC(NumLoadsFromStoresPromoted, 
"Number of loads from stores promoted");
 
   64STATISTIC(NumFailedAlignmentCheck, 
"Number of load/store pair transformation " 
   65                                   "not passed the alignment check");
 
   67          "Number of const offset of index address folded");
 
   70              "Controls which pairs are considered for renaming");
 
   90#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" 
   94using LdStPairFlags = 
struct LdStPairFlags {
 
   98  bool MergeForward = 
false;
 
  109  std::optional<MCPhysReg> RenameReg;
 
  111  LdStPairFlags() = 
default;
 
  113  void setMergeForward(
bool V = 
true) { MergeForward = V; }
 
  114  bool getMergeForward()
 const { 
return MergeForward; }
 
  116  void setSExtIdx(
int V) { SExtIdx = V; }
 
  117  int getSExtIdx()
 const { 
return SExtIdx; }
 
  119  void setRenameReg(
MCPhysReg R) { RenameReg = R; }
 
  120  void clearRenameReg() { RenameReg = std::nullopt; }
 
  121  std::optional<MCPhysReg> getRenameReg()
 const { 
return RenameReg; }
 
  147                                               LdStPairFlags &Flags,
 
  149                                               bool FindNarrowMerge);
 
  160                        const LdStPairFlags &Flags);
 
  166                   const LdStPairFlags &Flags);
 
  178                                int UnscaledOffset, 
unsigned Limit);
 
  200                            unsigned BaseReg, 
int Offset);
 
  203                              unsigned IndexReg, 
unsigned &
Offset);
 
  206  std::optional<MachineBasicBlock::iterator>
 
  209                  bool IsPreIdx, 
bool MergeEither);
 
  242char AArch64LoadStoreOpt::ID = 0;
 
  249static 
bool isNarrowStore(
unsigned Opc) {
 
  253  case AArch64::STRBBui:
 
  254  case AArch64::STURBBi:
 
  255  case AArch64::STRHHui:
 
  256  case AArch64::STURHHi:
 
 
  264  switch (
MI.getOpcode()) {
 
  270  case AArch64::STZ2Gi:
 
 
  276                                         bool *IsValidLdStrOpc = 
nullptr) {
 
  278    *IsValidLdStrOpc = 
true;
 
  282      *IsValidLdStrOpc = 
false;
 
  283    return std::numeric_limits<unsigned>::max();
 
  284  case AArch64::STRDui:
 
  285  case AArch64::STURDi:
 
  286  case AArch64::STRDpre:
 
  287  case AArch64::STRQui:
 
  288  case AArch64::STURQi:
 
  289  case AArch64::STRQpre:
 
  290  case AArch64::STRBBui:
 
  291  case AArch64::STURBBi:
 
  292  case AArch64::STRHHui:
 
  293  case AArch64::STURHHi:
 
  294  case AArch64::STRWui:
 
  295  case AArch64::STRWpre:
 
  296  case AArch64::STURWi:
 
  297  case AArch64::STRXui:
 
  298  case AArch64::STRXpre:
 
  299  case AArch64::STURXi:
 
  300  case AArch64::STR_ZXI:
 
  301  case AArch64::LDRDui:
 
  302  case AArch64::LDURDi:
 
  303  case AArch64::LDRDpre:
 
  304  case AArch64::LDRQui:
 
  305  case AArch64::LDURQi:
 
  306  case AArch64::LDRQpre:
 
  307  case AArch64::LDRWui:
 
  308  case AArch64::LDURWi:
 
  309  case AArch64::LDRWpre:
 
  310  case AArch64::LDRXui:
 
  311  case AArch64::LDURXi:
 
  312  case AArch64::LDRXpre:
 
  313  case AArch64::STRSui:
 
  314  case AArch64::STURSi:
 
  315  case AArch64::STRSpre:
 
  316  case AArch64::LDRSui:
 
  317  case AArch64::LDURSi:
 
  318  case AArch64::LDRSpre:
 
  319  case AArch64::LDR_ZXI:
 
  321  case AArch64::LDRSWui:
 
  322    return AArch64::LDRWui;
 
  323  case AArch64::LDURSWi:
 
  324    return AArch64::LDURWi;
 
  325  case AArch64::LDRSWpre:
 
  326    return AArch64::LDRWpre;
 
 
  334  case AArch64::STRBBui:
 
  335    return AArch64::STRHHui;
 
  336  case AArch64::STRHHui:
 
  337    return AArch64::STRWui;
 
  338  case AArch64::STURBBi:
 
  339    return AArch64::STURHHi;
 
  340  case AArch64::STURHHi:
 
  341    return AArch64::STURWi;
 
  342  case AArch64::STURWi:
 
  343    return AArch64::STURXi;
 
  344  case AArch64::STRWui:
 
  345    return AArch64::STRXui;
 
 
  353  case AArch64::STRSui:
 
  354  case AArch64::STURSi:
 
  355    return AArch64::STPSi;
 
  356  case AArch64::STRSpre:
 
  357    return AArch64::STPSpre;
 
  358  case AArch64::STRDui:
 
  359  case AArch64::STURDi:
 
  360    return AArch64::STPDi;
 
  361  case AArch64::STRDpre:
 
  362    return AArch64::STPDpre;
 
  363  case AArch64::STRQui:
 
  364  case AArch64::STURQi:
 
  365  case AArch64::STR_ZXI:
 
  366    return AArch64::STPQi;
 
  367  case AArch64::STRQpre:
 
  368    return AArch64::STPQpre;
 
  369  case AArch64::STRWui:
 
  370  case AArch64::STURWi:
 
  371    return AArch64::STPWi;
 
  372  case AArch64::STRWpre:
 
  373    return AArch64::STPWpre;
 
  374  case AArch64::STRXui:
 
  375  case AArch64::STURXi:
 
  376    return AArch64::STPXi;
 
  377  case AArch64::STRXpre:
 
  378    return AArch64::STPXpre;
 
  379  case AArch64::LDRSui:
 
  380  case AArch64::LDURSi:
 
  381    return AArch64::LDPSi;
 
  382  case AArch64::LDRSpre:
 
  383    return AArch64::LDPSpre;
 
  384  case AArch64::LDRDui:
 
  385  case AArch64::LDURDi:
 
  386    return AArch64::LDPDi;
 
  387  case AArch64::LDRDpre:
 
  388    return AArch64::LDPDpre;
 
  389  case AArch64::LDRQui:
 
  390  case AArch64::LDURQi:
 
  391  case AArch64::LDR_ZXI:
 
  392    return AArch64::LDPQi;
 
  393  case AArch64::LDRQpre:
 
  394    return AArch64::LDPQpre;
 
  395  case AArch64::LDRWui:
 
  396  case AArch64::LDURWi:
 
  397    return AArch64::LDPWi;
 
  398  case AArch64::LDRWpre:
 
  399    return AArch64::LDPWpre;
 
  400  case AArch64::LDRXui:
 
  401  case AArch64::LDURXi:
 
  402    return AArch64::LDPXi;
 
  403  case AArch64::LDRXpre:
 
  404    return AArch64::LDPXpre;
 
  405  case AArch64::LDRSWui:
 
  406  case AArch64::LDURSWi:
 
  407    return AArch64::LDPSWi;
 
  408  case AArch64::LDRSWpre:
 
  409    return AArch64::LDPSWpre;
 
 
  420  case AArch64::LDRBBui:
 
  421    return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
 
  422           StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
 
  423  case AArch64::LDURBBi:
 
  424    return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
 
  425           StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
 
  426  case AArch64::LDRHHui:
 
  427    return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
 
  428           StOpc == AArch64::STRXui;
 
  429  case AArch64::LDURHHi:
 
  430    return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
 
  431           StOpc == AArch64::STURXi;
 
  432  case AArch64::LDRWui:
 
  433    return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
 
  434  case AArch64::LDURWi:
 
  435    return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
 
  436  case AArch64::LDRXui:
 
  437    return StOpc == AArch64::STRXui;
 
  438  case AArch64::LDURXi:
 
  439    return StOpc == AArch64::STURXi;
 
 
  451  case AArch64::STRSui:
 
  452    return AArch64::STRSpre;
 
  453  case AArch64::STRDui:
 
  454    return AArch64::STRDpre;
 
  455  case AArch64::STRQui:
 
  456    return AArch64::STRQpre;
 
  457  case AArch64::STRBBui:
 
  458    return AArch64::STRBBpre;
 
  459  case AArch64::STRHHui:
 
  460    return AArch64::STRHHpre;
 
  461  case AArch64::STRWui:
 
  462    return AArch64::STRWpre;
 
  463  case AArch64::STRXui:
 
  464    return AArch64::STRXpre;
 
  465  case AArch64::LDRSui:
 
  466    return AArch64::LDRSpre;
 
  467  case AArch64::LDRDui:
 
  468    return AArch64::LDRDpre;
 
  469  case AArch64::LDRQui:
 
  470    return AArch64::LDRQpre;
 
  471  case AArch64::LDRBBui:
 
  472    return AArch64::LDRBBpre;
 
  473  case AArch64::LDRHHui:
 
  474    return AArch64::LDRHHpre;
 
  475  case AArch64::LDRWui:
 
  476    return AArch64::LDRWpre;
 
  477  case AArch64::LDRXui:
 
  478    return AArch64::LDRXpre;
 
  479  case AArch64::LDRSWui:
 
  480    return AArch64::LDRSWpre;
 
  482    return AArch64::LDPSpre;
 
  483  case AArch64::LDPSWi:
 
  484    return AArch64::LDPSWpre;
 
  486    return AArch64::LDPDpre;
 
  488    return AArch64::LDPQpre;
 
  490    return AArch64::LDPWpre;
 
  492    return AArch64::LDPXpre;
 
  494    return AArch64::STPSpre;
 
  496    return AArch64::STPDpre;
 
  498    return AArch64::STPQpre;
 
  500    return AArch64::STPWpre;
 
  502    return AArch64::STPXpre;
 
  504    return AArch64::STGPreIndex;
 
  506    return AArch64::STZGPreIndex;
 
  508    return AArch64::ST2GPreIndex;
 
  509  case AArch64::STZ2Gi:
 
  510    return AArch64::STZ2GPreIndex;
 
  512    return AArch64::STGPpre;
 
 
  521  case AArch64::LDRBroX:
 
  522    return AArch64::LDRBui;
 
  523  case AArch64::LDRBBroX:
 
  524    return AArch64::LDRBBui;
 
  525  case AArch64::LDRSBXroX:
 
  526    return AArch64::LDRSBXui;
 
  527  case AArch64::LDRSBWroX:
 
  528    return AArch64::LDRSBWui;
 
  529  case AArch64::LDRHroX:
 
  530    return AArch64::LDRHui;
 
  531  case AArch64::LDRHHroX:
 
  532    return AArch64::LDRHHui;
 
  533  case AArch64::LDRSHXroX:
 
  534    return AArch64::LDRSHXui;
 
  535  case AArch64::LDRSHWroX:
 
  536    return AArch64::LDRSHWui;
 
  537  case AArch64::LDRWroX:
 
  538    return AArch64::LDRWui;
 
  539  case AArch64::LDRSroX:
 
  540    return AArch64::LDRSui;
 
  541  case AArch64::LDRSWroX:
 
  542    return AArch64::LDRSWui;
 
  543  case AArch64::LDRDroX:
 
  544    return AArch64::LDRDui;
 
  545  case AArch64::LDRXroX:
 
  546    return AArch64::LDRXui;
 
  547  case AArch64::LDRQroX:
 
  548    return AArch64::LDRQui;
 
 
  556  case AArch64::STRSui:
 
  557  case AArch64::STURSi:
 
  558    return AArch64::STRSpost;
 
  559  case AArch64::STRDui:
 
  560  case AArch64::STURDi:
 
  561    return AArch64::STRDpost;
 
  562  case AArch64::STRQui:
 
  563  case AArch64::STURQi:
 
  564    return AArch64::STRQpost;
 
  565  case AArch64::STRBBui:
 
  566    return AArch64::STRBBpost;
 
  567  case AArch64::STRHHui:
 
  568    return AArch64::STRHHpost;
 
  569  case AArch64::STRWui:
 
  570  case AArch64::STURWi:
 
  571    return AArch64::STRWpost;
 
  572  case AArch64::STRXui:
 
  573  case AArch64::STURXi:
 
  574    return AArch64::STRXpost;
 
  575  case AArch64::LDRSui:
 
  576  case AArch64::LDURSi:
 
  577    return AArch64::LDRSpost;
 
  578  case AArch64::LDRDui:
 
  579  case AArch64::LDURDi:
 
  580    return AArch64::LDRDpost;
 
  581  case AArch64::LDRQui:
 
  582  case AArch64::LDURQi:
 
  583    return AArch64::LDRQpost;
 
  584  case AArch64::LDRBBui:
 
  585    return AArch64::LDRBBpost;
 
  586  case AArch64::LDRHHui:
 
  587    return AArch64::LDRHHpost;
 
  588  case AArch64::LDRWui:
 
  589  case AArch64::LDURWi:
 
  590    return AArch64::LDRWpost;
 
  591  case AArch64::LDRXui:
 
  592  case AArch64::LDURXi:
 
  593    return AArch64::LDRXpost;
 
  594  case AArch64::LDRSWui:
 
  595    return AArch64::LDRSWpost;
 
  597    return AArch64::LDPSpost;
 
  598  case AArch64::LDPSWi:
 
  599    return AArch64::LDPSWpost;
 
  601    return AArch64::LDPDpost;
 
  603    return AArch64::LDPQpost;
 
  605    return AArch64::LDPWpost;
 
  607    return AArch64::LDPXpost;
 
  609    return AArch64::STPSpost;
 
  611    return AArch64::STPDpost;
 
  613    return AArch64::STPQpost;
 
  615    return AArch64::STPWpost;
 
  617    return AArch64::STPXpost;
 
  619    return AArch64::STGPostIndex;
 
  621    return AArch64::STZGPostIndex;
 
  623    return AArch64::ST2GPostIndex;
 
  624  case AArch64::STZ2Gi:
 
  625    return AArch64::STZ2GPostIndex;
 
  627    return AArch64::STGPpost;
 
 
  634  unsigned OpcB = 
MI.getOpcode();
 
  639  case AArch64::STRSpre:
 
  640    return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
 
  641  case AArch64::STRDpre:
 
  642    return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
 
  643  case AArch64::STRQpre:
 
  644    return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
 
  645  case AArch64::STRWpre:
 
  646    return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
 
  647  case AArch64::STRXpre:
 
  648    return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
 
  649  case AArch64::LDRSpre:
 
  650    return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
 
  651  case AArch64::LDRDpre:
 
  652    return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
 
  653  case AArch64::LDRQpre:
 
  654    return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
 
  655  case AArch64::LDRWpre:
 
  656    return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
 
  657  case AArch64::LDRXpre:
 
  658    return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
 
  659  case AArch64::LDRSWpre:
 
  660    return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
 
 
  666                                       int &MinOffset, 
int &MaxOffset) {
 
 
  684                                    unsigned PairedRegOp = 0) {
 
  685  assert(PairedRegOp < 2 && 
"Unexpected register operand idx.");
 
  691  return MI.getOperand(Idx);
 
 
  700  int UnscaledStOffset =
 
  704  int UnscaledLdOffset =
 
  708  return (UnscaledStOffset <= UnscaledLdOffset) &&
 
  709         (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
 
 
  713  unsigned Opc = 
MI.getOpcode();
 
  714  return (
Opc == AArch64::STRWui || 
Opc == AArch64::STURWi ||
 
  715          isNarrowStore(
Opc)) &&
 
 
  720  switch (
MI.getOpcode()) {
 
  724  case AArch64::LDRBBui:
 
  725  case AArch64::LDRHHui:
 
  726  case AArch64::LDRWui:
 
  727  case AArch64::LDRXui:
 
  729  case AArch64::LDURBBi:
 
  730  case AArch64::LDURHHi:
 
  731  case AArch64::LDURWi:
 
  732  case AArch64::LDURXi:
 
 
  738  unsigned Opc = 
MI.getOpcode();
 
  743  case AArch64::STRSui:
 
  744  case AArch64::STRDui:
 
  745  case AArch64::STRQui:
 
  746  case AArch64::STRXui:
 
  747  case AArch64::STRWui:
 
  748  case AArch64::STRHHui:
 
  749  case AArch64::STRBBui:
 
  750  case AArch64::LDRSui:
 
  751  case AArch64::LDRDui:
 
  752  case AArch64::LDRQui:
 
  753  case AArch64::LDRXui:
 
  754  case AArch64::LDRWui:
 
  755  case AArch64::LDRHHui:
 
  756  case AArch64::LDRBBui:
 
  760  case AArch64::STZ2Gi:
 
  763  case AArch64::STURSi:
 
  764  case AArch64::STURDi:
 
  765  case AArch64::STURQi:
 
  766  case AArch64::STURWi:
 
  767  case AArch64::STURXi:
 
  768  case AArch64::LDURSi:
 
  769  case AArch64::LDURDi:
 
  770  case AArch64::LDURQi:
 
  771  case AArch64::LDURWi:
 
  772  case AArch64::LDURXi:
 
  775  case AArch64::LDPSWi:
 
 
  804  unsigned Opc = 
MI.getOpcode();
 
  810  case AArch64::LDRBroX:
 
  811  case AArch64::LDRBBroX:
 
  812  case AArch64::LDRSBXroX:
 
  813  case AArch64::LDRSBWroX:
 
  816  case AArch64::LDRHroX:
 
  817  case AArch64::LDRHHroX:
 
  818  case AArch64::LDRSHXroX:
 
  819  case AArch64::LDRSHWroX:
 
  822  case AArch64::LDRWroX:
 
  823  case AArch64::LDRSroX:
 
  824  case AArch64::LDRSWroX:
 
  827  case AArch64::LDRDroX:
 
  828  case AArch64::LDRXroX:
 
  831  case AArch64::LDRQroX:
 
 
  841  case AArch64::ORRWrs:
 
  842  case AArch64::ADDWri:
 
 
  850                                           const LdStPairFlags &Flags) {
 
  852         "Expected promotable zero stores.");
 
  860  if (NextI == MergeMI)
 
  863  unsigned Opc = 
I->getOpcode();
 
  864  unsigned MergeMIOpc = MergeMI->getOpcode();
 
  865  bool IsScaled = !
TII->hasUnscaledLdStOffset(
Opc);
 
  866  bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
 
  867  int OffsetStride = IsScaled ? 
TII->getMemScale(*
I) : 1;
 
  868  int MergeMIOffsetStride = IsMergedMIScaled ? 
TII->getMemScale(*MergeMI) : 1;
 
  870  bool MergeForward = 
Flags.getMergeForward();
 
  876  const MachineOperand &BaseRegOp =
 
  878                   : AArch64InstrInfo::getLdStBaseOp(*
I);
 
  881  int64_t IOffsetInBytes =
 
  883  int64_t MIOffsetInBytes =
 
  888  if (IOffsetInBytes > MIOffsetInBytes)
 
  889    OffsetImm = MIOffsetInBytes;
 
  891    OffsetImm = IOffsetInBytes;
 
  896  if (!
TII->hasUnscaledLdStOffset(NewOpcode)) {
 
  897    int NewOffsetStride = 
TII->getMemScale(NewOpcode);
 
  898    assert(((OffsetImm % NewOffsetStride) == 0) &&
 
  899           "Offset should be a multiple of the store memory scale");
 
  900    OffsetImm = OffsetImm / NewOffsetStride;
 
  906  MachineInstrBuilder MIB;
 
  908            .
addReg(isNarrowStore(
Opc) ? AArch64::WZR : AArch64::XZR)
 
  912            .setMIFlags(
I->mergeFlagsWith(*MergeMI));
 
  915  LLVM_DEBUG(
dbgs() << 
"Creating wider store. Replacing instructions:\n    ");
 
  924  I->eraseFromParent();
 
  925  MergeMI->eraseFromParent();
 
  935  auto MBB = 
MI.getParent();
 
  943      return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
 
  944             TRI->regsOverlap(MOP.getReg(), DefReg);
 
 
  958    if (MOP.isReg() && MOP.isKill())
 
  962    if (MOP.isReg() && !MOP.isKill())
 
  963      Units.
addReg(MOP.getReg());
 
 
  970                                         unsigned InstrNumToSet,
 
  977  unsigned OperandNo = 0;
 
  978  bool RegFound = 
false;
 
  979  for (
const auto Op : MergedInstr.
operands()) {
 
  980    if (
Op.getReg() == 
Reg) {
 
  989                                   {InstrNumToSet, OperandNo});
 
 
  995                                      const LdStPairFlags &Flags) {
 
 1002  if (NextI == Paired)
 
 1005  int SExtIdx = 
Flags.getSExtIdx();
 
 1008  bool IsUnscaled = 
TII->hasUnscaledLdStOffset(
Opc);
 
 1009  int OffsetStride = IsUnscaled ? 
TII->getMemScale(*
I) : 1;
 
 1011  bool MergeForward = 
Flags.getMergeForward();
 
 1013  std::optional<MCPhysReg> RenameReg = 
Flags.getRenameReg();
 
 1016    DefinedInBB.
addReg(*RenameReg);
 
 1020    auto GetMatchingSubReg =
 
 1021        [
this, RenameReg](
const TargetRegisterClass *
C) -> 
MCPhysReg {
 
 1023           TRI->sub_and_superregs_inclusive(*RenameReg)) {
 
 1024        if (
C->contains(SubOrSuper))
 
 1030    std::function<bool(MachineInstr &, 
bool)> UpdateMIs =
 
 1031        [
this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &
MI,
 
 1034            bool SeenDef = 
false;
 
 1036              MachineOperand &MOP = 
MI.getOperand(
OpIdx);
 
 1040                  (!MergeForward || !SeenDef ||
 
 1042                  TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
 
 1045                       "Need renamable operands");
 
 1047                if (
const TargetRegisterClass *RC =
 
 1049                  MatchingReg = GetMatchingSubReg(RC);
 
 1053                  MatchingReg = GetMatchingSubReg(
 
 1054                      TRI->getMinimalPhysRegClass(MOP.
getReg()));
 
 1062              MachineOperand &MOP = 
MI.getOperand(
OpIdx);
 
 1064                  TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
 
 1067                           "Need renamable operands");
 
 1069                if (
const TargetRegisterClass *RC =
 
 1071                  MatchingReg = GetMatchingSubReg(RC);
 
 1073                  MatchingReg = GetMatchingSubReg(
 
 1074                      TRI->getMinimalPhysRegClass(MOP.
getReg()));
 
 1075                assert(MatchingReg != AArch64::NoRegister &&
 
 1076                       "Cannot find matching regs for renaming");
 
 1085                      TRI, UINT32_MAX, UpdateMIs);
 
 1098      RegToCheck = RegToRename;
 
 1101             MergeForward ? std::next(
I) : 
I,
 
 1102             MergeForward ? std::next(Paired) : Paired))
 
 1104                    [
this, RegToCheck](
const MachineOperand &MOP) {
 
 1105                      return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
 
 1107                             !TRI->regsOverlap(MOP.getReg(), RegToCheck);
 
 1109             "Rename register used between paired instruction, trashing the " 
 1119  const MachineOperand &BaseRegOp =
 
 1121                   : AArch64InstrInfo::getLdStBaseOp(*
I);
 
 1125  bool PairedIsUnscaled = 
TII->hasUnscaledLdStOffset(Paired->getOpcode());
 
 1126  if (IsUnscaled != PairedIsUnscaled) {
 
 1130    int MemSize = 
TII->getMemScale(*Paired);
 
 1131    if (PairedIsUnscaled) {
 
 1134      assert(!(PairedOffset % 
TII->getMemScale(*Paired)) &&
 
 1135             "Offset should be a multiple of the stride!");
 
 1136      PairedOffset /= MemSize;
 
 1138      PairedOffset *= MemSize;
 
 1145  MachineInstr *RtMI, *Rt2MI;
 
 1146  if (
Offset == PairedOffset + OffsetStride &&
 
 1154      SExtIdx = (SExtIdx + 1) % 2;
 
 1162    assert(!(OffsetImm % 
TII->getMemScale(*RtMI)) &&
 
 1163           "Unscaled offset cannot be scaled.");
 
 1164    OffsetImm /= 
TII->getMemScale(*RtMI);
 
 1168  MachineInstrBuilder MIB;
 
 1173  MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
 
 1175  if (RegOp0.
isUse()) {
 
 1176    if (!MergeForward) {
 
 1187      for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
 
 1188        if (It->readsRegister(PairedRegOp.
getReg(), 
TRI))
 
 1196      for (MachineInstr &
MI :
 
 1197           make_range(std::next(
I->getIterator()), Paired->getIterator()))
 
 1214      .setMIFlags(
I->mergeFlagsWith(*Paired));
 
 1219      dbgs() << 
"Creating pair load/store. Replacing instructions:\n    ");
 
 1224  if (SExtIdx != -1) {
 
 1229    MachineOperand &DstMO = MIB->
getOperand(SExtIdx);
 
 1234    Register DstRegW = 
TRI->getSubReg(DstRegX, AArch64::sub_32);
 
 1243    MachineInstrBuilder MIBKill =
 
 1244        BuildMI(*
MBB, InsertionPoint, 
DL, 
TII->get(TargetOpcode::KILL), DstRegW)
 
 1249    MachineInstrBuilder MIBSXTW =
 
 1250        BuildMI(*
MBB, InsertionPoint, 
DL, 
TII->get(AArch64::SBFMXri), DstRegX)
 
 1290    if (
I->peekDebugInstrNum()) {
 
 1298      unsigned NewInstrNum;
 
 1299      if (DstRegX == 
I->getOperand(0).getReg()) {
 
 1308    if (Paired->peekDebugInstrNum()) {
 
 1316      unsigned NewInstrNum;
 
 1317      if (DstRegX == Paired->getOperand(0).getReg()) {
 
 1330  } 
else if (
Opc == AArch64::LDR_ZXI || 
Opc == AArch64::STR_ZXI) {
 
 1336           AArch64::ZPRRegClass.contains(MOp1.
getReg()) && 
"Invalid register.");
 
 1337    MOp0.
setReg(AArch64::Q0 + (MOp0.
getReg() - AArch64::Z0));
 
 1338    MOp1.
setReg(AArch64::Q0 + (MOp1.
getReg() - AArch64::Z0));
 
 1369    if (
I->peekDebugInstrNum()) {
 
 1374    if (Paired->peekDebugInstrNum()) {
 
 1390  I->eraseFromParent();
 
 1391  Paired->eraseFromParent();
 
 1400      next_nodbg(LoadI, LoadI->getParent()->end());
 
 1402  int LoadSize = 
TII->getMemScale(*LoadI);
 
 1403  int StoreSize = 
TII->getMemScale(*StoreI);
 
 1407  bool IsStoreXReg = 
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
 
 1410          TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
 
 1411         "Unexpected RegClass");
 
 1413  MachineInstr *BitExtMI;
 
 1414  if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
 
 1417    if (StRt == LdRt && LoadSize == 8) {
 
 1418      for (MachineInstr &
MI : 
make_range(StoreI->getIterator(),
 
 1419                                         LoadI->getIterator())) {
 
 1420        if (
MI.killsRegister(StRt, 
TRI)) {
 
 1421          MI.clearRegisterKills(StRt, 
TRI);
 
 1428      LoadI->eraseFromParent();
 
 1433        BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
 
 1434                TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
 
 1435            .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
 
 1444    bool IsUnscaled = 
TII->hasUnscaledLdStOffset(*LoadI);
 
 1445    assert(IsUnscaled == 
TII->hasUnscaledLdStOffset(*StoreI) &&
 
 1446           "Unsupported ld/st match");
 
 1447    assert(LoadSize <= StoreSize && 
"Invalid load size");
 
 1448    int UnscaledLdOffset =
 
 1452    int UnscaledStOffset =
 
 1456    int Width = LoadSize * 8;
 
 1459                          LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
 
 1462    assert((UnscaledLdOffset >= UnscaledStOffset &&
 
 1463            (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
 
 1466    int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
 
 1467    int Imms = Immr + Width - 1;
 
 1468    if (UnscaledLdOffset == UnscaledStOffset) {
 
 1469      uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) 
 
 1475          BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
 
 1476                  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
 
 1481    } 
else if (IsStoreXReg && Imms == 31) {
 
 1484      assert(Immr <= Imms && 
"Expected LSR alias of UBFM");
 
 1485      BitExtMI = 
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
 
 1486                         TII->get(AArch64::UBFMWri),
 
 1487                         TRI->getSubReg(DestReg, AArch64::sub_32))
 
 1488                     .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
 
 1494          BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
 
 1495                  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
 
 1505  for (MachineInstr &
MI : 
make_range(StoreI->getIterator(),
 
 1507    if (
MI.killsRegister(StRt, 
TRI)) {
 
 1508      MI.clearRegisterKills(StRt, 
TRI);
 
 1523  LoadI->eraseFromParent();
 
 1533    if (
Offset % OffsetStride)
 
 
 1545  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
 
 
 1562bool AArch64LoadStoreOpt::findMatchingStore(
 
 1567  MachineInstr &LoadMI = *
I;
 
 1577  ModifiedRegUnits.
clear();
 
 1578  UsedRegUnits.
clear();
 
 1583    MachineInstr &
MI = *
MBBI;
 
 1587    if (!
MI.isTransient())
 
 1613    if (!ModifiedRegUnits.
available(BaseReg))
 
 1631                                       LdStPairFlags &Flags,
 
 1634  if (
MI.hasOrderedMemoryRef() || 
TII->isLdStPairSuppressed(
MI))
 
 1639         !
TII->isLdStPairSuppressed(FirstMI) &&
 
 1640         "FirstMI shouldn't get here if either of these checks are true.");
 
 1647  unsigned OpcB = 
MI.getOpcode();
 
 1655  if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
 
 1656      OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
 
 1664  bool IsValidLdStrOpc, PairIsValidLdStrOpc;
 
 1666  assert(IsValidLdStrOpc &&
 
 1667         "Given Opc should be a Load or Store with an immediate");
 
 1670    Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
 
 1676  if (!PairIsValidLdStrOpc)
 
 1681  if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
 
 1684           TII->getMemScale(FirstMI) == 
TII->getMemScale(
MI);
 
 1693  return TII->hasUnscaledLdStOffset(OpcA) != 
TII->hasUnscaledLdStOffset(OpcB) &&
 
 
 1702    auto *RegClass = 
TRI->getMinimalPhysRegClass(MOP.
getReg());
 
 1709    if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
 
 1710        (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
 
 1711         TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
 
 1712         TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
 
 1715          << 
"  Cannot rename operands with multiple disjunct subregisters (" 
 1726      return TRI->isSuperOrSubRegisterEq(
 
 
 1749                return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
 
 1750                       MOP.isImplicit() && MOP.isKill() &&
 
 1751                       TRI->regsOverlap(RegToRename, MOP.getReg());
 
 1757  bool FoundDef = 
false;
 
 1788      if (
MI.isPseudo()) {
 
 1789        LLVM_DEBUG(
dbgs() << 
"  Cannot rename pseudo/bundle instruction\n");
 
 1793      for (
auto &MOP : 
MI.operands()) {
 
 1795            !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
 
 1805      for (
auto &MOP : 
MI.operands()) {
 
 1807            !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
 
 1824    LLVM_DEBUG(
dbgs() << 
"  Did not find definition for register in BB\n");
 
 
 1852        LLVM_DEBUG(dbgs() << 
"Checking " << MI);
 
 1854        if (MI.getFlag(MachineInstr::FrameSetup)) {
 
 1855          LLVM_DEBUG(dbgs() << 
"  Cannot rename framesetup instructions " 
 1860        for (
auto &MOP : 
MI.operands()) {
 
 1861          if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
 
 1862              !TRI->regsOverlap(MOP.getReg(), RegToRename))
 
 1864          if (!canRenameMOP(MOP, TRI)) {
 
 1865            LLVM_DEBUG(dbgs() << 
"  Cannot rename " << MOP << 
" in " << MI);
 
 
 1891  auto AnySubOrSuperRegCalleePreserved = [&MF, 
TRI](
MCPhysReg PR) {
 
 1892    return any_of(
TRI->sub_and_superregs_inclusive(PR),
 
 1894                    return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
 
 1900  auto CanBeUsedForAllClasses = [&RequiredClasses, 
TRI](
MCPhysReg PR) {
 
 1903          TRI->sub_and_superregs_inclusive(PR),
 
 1904          [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
 
 1908  auto *RegClass = 
TRI->getMinimalPhysRegClass(
Reg);
 
 1911        !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
 
 1912        CanBeUsedForAllClasses(PR)) {
 
 1920                    << 
TRI->getRegClassName(RegClass) << 
"\n");
 
 1921  return std::nullopt;
 
 
 1932  std::optional<MCPhysReg> RenameReg;
 
 1941  const bool IsLoad = FirstMI.
mayLoad();
 
 1943  if (!MaybeCanRename) {
 
 1946                                                 RequiredClasses, 
TRI)};
 
 1952  if (*MaybeCanRename) {
 
 1954                                          RequiredClasses, 
TRI);
 
 
 1963                                      LdStPairFlags &Flags, 
unsigned Limit,
 
 1964                                      bool FindNarrowMerge) {
 
 1968  MachineInstr &FirstMI = *
I;
 
 1972  bool IsUnscaled = 
TII->hasUnscaledLdStOffset(FirstMI);
 
 1976  int OffsetStride = IsUnscaled ? 
TII->getMemScale(FirstMI) : 1;
 
 1979  std::optional<bool> MaybeCanRename;
 
 1981    MaybeCanRename = {
false};
 
 1983  SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
 
 1984  LiveRegUnits UsedInBetween;
 
 1987  Flags.clearRenameReg();
 
 1991  ModifiedRegUnits.
clear();
 
 1992  UsedRegUnits.
clear();
 
 1995  SmallVector<MachineInstr *, 4> MemInsns;
 
 2000    MachineInstr &
MI = *
MBBI;
 
 2007    if (!
MI.isTransient())
 
 2010    Flags.setSExtIdx(-1);
 
 2013      assert(
MI.mayLoadOrStore() && 
"Expected memory operation.");
 
 2022      bool MIIsUnscaled = 
TII->hasUnscaledLdStOffset(
MI);
 
 2023      if (IsUnscaled != MIIsUnscaled) {
 
 2027        int MemSize = 
TII->getMemScale(
MI);
 
 2031          if (MIOffset % MemSize) {
 
 2037          MIOffset /= MemSize;
 
 2039          MIOffset *= MemSize;
 
 2045      if (BaseReg == MIBaseReg) {
 
 2051          bool IsOutOfBounds = MIOffset != 
TII->getMemScale(
MI);
 
 2052          bool IsBaseRegUsed = !UsedRegUnits.
available(
 
 2054          bool IsBaseRegModified = !ModifiedRegUnits.
available(
 
 2059          bool IsMIRegTheSame =
 
 2062          if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
 
 2070          if ((
Offset != MIOffset + OffsetStride) &&
 
 2071              (
Offset + OffsetStride != MIOffset)) {
 
 2080        if (FindNarrowMerge) {
 
 2085          if ((!IsUnscaled && 
alignTo(MinOffset, 2) != MinOffset) ||
 
 2102                              << 
"keep looking.\n");
 
 2108          if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
 
 2113                       << 
"Offset doesn't fit due to alignment requirements, " 
 2114                       << 
"keep looking.\n");
 
 2125        if (!ModifiedRegUnits.
available(BaseReg))
 
 2128        const bool SameLoadReg = 
MayLoad && 
TRI->isSuperOrSubRegisterEq(
 
 2135        bool RtNotModified =
 
 2137        bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
 
 2140        LLVM_DEBUG(
dbgs() << 
"Checking, can combine 2nd into 1st insn:\n" 
 2142                          << (RtNotModified ? 
"true" : 
"false") << 
"\n" 
 2144                          << (RtNotUsed ? 
"true" : 
"false") << 
"\n");
 
 2146        if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
 
 2151            std::optional<MCPhysReg> RenameReg =
 
 2153                                                Reg, DefinedInBB, UsedInBetween,
 
 2154                                                RequiredClasses, 
TRI);
 
 2160                                << 
"keep looking.\n");
 
 2163            Flags.setRenameReg(*RenameReg);
 
 2166          Flags.setMergeForward(
false);
 
 2168            Flags.clearRenameReg();
 
 2179        LLVM_DEBUG(
dbgs() << 
"Checking, can combine 1st into 2nd insn:\n" 
 2181                          << 
"' not modified: " 
 2182                          << (RtNotModified ? 
"true" : 
"false") << 
"\n");
 
 2184        if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
 
 2186            Flags.setMergeForward(
true);
 
 2187            Flags.clearRenameReg();
 
 2192              MaybeCanRename, FirstMI, 
MI, 
Reg, DefinedInBB, UsedInBetween,
 
 2193              RequiredClasses, 
TRI);
 
 2195            Flags.setMergeForward(
true);
 
 2196            Flags.setRenameReg(*RenameReg);
 
 2197            MBBIWithRenameReg = 
MBBI;
 
 2200        LLVM_DEBUG(
dbgs() << 
"Unable to combine these instructions due to " 
 2201                          << 
"interference in between, keep looking.\n");
 
 2205    if (
Flags.getRenameReg())
 
 2206      return MBBIWithRenameReg;
 
 2220    if (!ModifiedRegUnits.
available(BaseReg)) {
 
 2226    if (
MI.mayLoadOrStore())
 
 2234  assert((
MI.getOpcode() == AArch64::SUBXri ||
 
 2235          MI.getOpcode() == AArch64::ADDXri) &&
 
 2236         "Expected a register update instruction");
 
 2237  auto End = 
MI.getParent()->end();
 
 2238  if (MaybeCFI == End ||
 
 2239      MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
 
 2242      MI.getOperand(0).getReg() != AArch64::SP)
 
 2246  unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
 
 
 2257std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
 
 2259    bool IsForward, 
bool IsPreIdx, 
bool MergeEither) {
 
 2260  assert((Update->getOpcode() == AArch64::ADDXri ||
 
 2261          Update->getOpcode() == AArch64::SUBXri) &&
 
 2262         "Unexpected base register update instruction to merge!");
 
 2278        if (std::any_of(std::next(CFI), 
I, [](
const auto &Insn) {
 
 2279              return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
 
 2281          return std::nullopt;
 
 2283        MachineBasicBlock *
MBB = InsertPt->getParent();
 
 2292  if (NextI == Update)
 
 2295  int Value = Update->getOperand(2).getImm();
 
 2297         "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
 
 2298  if (Update->getOpcode() == AArch64::SUBXri)
 
 2303  MachineInstrBuilder MIB;
 
 2304  int Scale, MinOffset, MaxOffset;
 
 2308    MIB = 
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
 
 2310              .
add(Update->getOperand(0))
 
 2318    MIB = 
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
 
 2320              .
add(Update->getOperand(0))
 
 2345  I->eraseFromParent();
 
 2346  Update->eraseFromParent();
 
 2354                                          unsigned Offset, 
int Scale) {
 
 2355  assert((Update->getOpcode() == AArch64::MOVKWi) &&
 
 2356         "Unexpected const mov instruction to merge!");
 
 2360  MachineInstr &MemMI = *
I;
 
 2361  unsigned Mask = (1 << 12) * Scale - 1;
 
 2366  MachineInstrBuilder AddMIB, MemMIB;
 
 2370      BuildMI(*
I->getParent(), 
I, 
I->getDebugLoc(), 
TII->get(AArch64::ADDXri))
 
 2378  MemMIB = 
BuildMI(*
I->getParent(), 
I, 
I->getDebugLoc(), 
TII->get(NewOpc))
 
 2386  ++NumConstOffsetFolded;
 
 2401  I->eraseFromParent();
 
 2402  PrevI->eraseFromParent();
 
 2403  Update->eraseFromParent();
 
 2408bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
 
 2410                                               unsigned BaseReg, 
int Offset) {
 
 2411  switch (
MI.getOpcode()) {
 
 2414  case AArch64::SUBXri:
 
 2415  case AArch64::ADDXri:
 
 2418    if (!
MI.getOperand(2).isImm())
 
 2426    if (
MI.getOperand(0).getReg() != BaseReg ||
 
 2427        MI.getOperand(1).getReg() != BaseReg)
 
 2430    int UpdateOffset = 
MI.getOperand(2).getImm();
 
 2431    if (
MI.getOpcode() == AArch64::SUBXri)
 
 2432      UpdateOffset = -UpdateOffset;
 
 2436    int Scale, MinOffset, MaxOffset;
 
 2438    if (UpdateOffset % Scale != 0)
 
 2442    int ScaledOffset = UpdateOffset / Scale;
 
 2443    if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
 
 2455bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
 
 2461  if (
MI.getOpcode() == AArch64::MOVKWi &&
 
 2462      TRI->isSuperOrSubRegisterEq(IndexReg, 
MI.getOperand(1).getReg())) {
 
 2471    MachineInstr &MovzMI = *
MBBI;
 
 2473    if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
 
 2476      unsigned High = 
MI.getOperand(2).getImm() << 
MI.getOperand(3).getImm();
 
 2479      return Offset >> 24 == 0;
 
 2488  MachineInstr &MemMI = *
I;
 
 2493                         TII->getMemScale(MemMI);
 
 2498  if (MIUnscaledOffset != UnscaledOffset)
 
 2509    for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != 
e; ++i) {
 
 2511      if (DestReg == BaseReg || 
TRI->isSubRegister(BaseReg, DestReg))
 
 2518  ModifiedRegUnits.
clear();
 
 2519  UsedRegUnits.
clear();
 
 2525  const bool BaseRegSP = 
BaseReg == AArch64::SP;
 
 2534  MachineBasicBlock *CurMBB = 
I->getParent();
 
 2541      MachineInstr &
MI = *
MBBI;
 
 2545      if (!
MI.isTransient())
 
 2549      if (isMatchingUpdateInsn(*
I, 
MI, BaseReg, UnscaledOffset))
 
 2560      if (!ModifiedRegUnits.
available(BaseReg) ||
 
 2562          (BaseRegSP && 
MBBI->mayLoadOrStore()))
 
 2566    if (!VisitSucc || Limit <= 
Count)
 
 2571    MachineBasicBlock *SuccToVisit = 
nullptr;
 
 2572    unsigned LiveSuccCount = 0;
 
 2573    for (MachineBasicBlock *Succ : CurMBB->
successors()) {
 
 2574      for (MCRegAliasIterator AI(BaseReg, 
TRI, 
true); AI.isValid(); ++AI) {
 
 2575        if (Succ->isLiveIn(*AI)) {
 
 2576          if (LiveSuccCount++)
 
 2578          if (Succ->pred_size() == 1)
 
 2586    CurMBB = SuccToVisit;
 
 2597  MachineInstr &MemMI = *
I;
 
 2599  MachineFunction &MF = *MemMI.
getMF();
 
 2607                                     : AArch64::NoRegister};
 
 2616    for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != 
e; ++i)
 
 2617      if (DestReg[i] == BaseReg || 
TRI->isSubRegister(BaseReg, DestReg[i]))
 
 2621  const bool BaseRegSP = 
BaseReg == AArch64::SP;
 
 2629  const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
 
 2630  unsigned RedZoneSize =
 
 2635  ModifiedRegUnits.
clear();
 
 2636  UsedRegUnits.
clear();
 
 2638  bool MemAccessBeforeSPPreInc = 
false;
 
 2642    MachineInstr &
MI = *
MBBI;
 
 2646    if (!
MI.isTransient())
 
 2650    if (isMatchingUpdateInsn(*
I, 
MI, BaseReg, 
Offset)) {
 
 2653      if (MemAccessBeforeSPPreInc && 
MBBI->getOperand(2).getImm() > RedZoneSize)
 
 2663    if (!ModifiedRegUnits.
available(BaseReg) ||
 
 2671    if (
MI.mayLoadOrStore() || 
MI.hasUnmodeledSideEffects() ||
 
 2672        (DestReg[0] != AArch64::NoRegister &&
 
 2673         !(ModifiedRegUnits.
available(DestReg[0]) &&
 
 2675        (DestReg[1] != AArch64::NoRegister &&
 
 2676         !(ModifiedRegUnits.
available(DestReg[1]) &&
 
 2678      MergeEither = 
false;
 
 2683    if (BaseRegSP && 
MBBI->mayLoadOrStore())
 
 2684      MemAccessBeforeSPPreInc = 
true;
 
 2690AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
 
 2694  MachineInstr &MemMI = *
I;
 
 2713  ModifiedRegUnits.
clear();
 
 2714  UsedRegUnits.
clear();
 
 2718    MachineInstr &
MI = *
MBBI;
 
 2722    if (!
MI.isTransient())
 
 2726    if (isMatchingMovConstInsn(*
I, 
MI, IndexReg, 
Offset)) {
 
 2735    if (!ModifiedRegUnits.
available(IndexReg) ||
 
 2743bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
 
 2745  MachineInstr &
MI = *
MBBI;
 
 2747  if (
MI.hasOrderedMemoryRef())
 
 2761    ++NumLoadsFromStoresPromoted;
 
 2765    MBBI = promoteLoadFromStore(
MBBI, StoreI);
 
 2772bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
 
 2775  MachineInstr &
MI = *
MBBI;
 
 2778  if (!
TII->isCandidateToMergeOrPair(
MI))
 
 2782  LdStPairFlags 
Flags;
 
 2786    ++NumZeroStoresPromoted;
 
 2790    MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
 
 2799  MachineInstr &
MI = *
MBBI;
 
 2802  if (!
TII->isCandidateToMergeOrPair(
MI))
 
 2806  if (
MI.mayLoad() && Subtarget->hasDisableLdp())
 
 2810  if (
MI.mayStore() && Subtarget->hasDisableStp())
 
 2816  bool IsUnscaled = 
TII->hasUnscaledLdStOffset(
MI);
 
 2818  int OffsetStride = IsUnscaled ? 
TII->getMemScale(
MI) : 1;
 
 2826  LdStPairFlags 
Flags;
 
 2832    auto Prev = std::prev(
MBBI);
 
 2836    MachineMemOperand *MemOp =
 
 2837        MI.memoperands_empty() ? nullptr : 
MI.memoperands().front();
 
 2842    if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
 
 2843        (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
 
 2845      if (!MemOp || !MemOp->getMemoryType().isValid()) {
 
 2846        NumFailedAlignmentCheck++;
 
 2852      uint64_t MemAlignment = MemOp->getAlign().value();
 
 2853      uint64_t TypeAlignment =
 
 2854          Align(MemOp->getSize().getValue().getKnownMinValue()).value();
 
 2856      if (MemAlignment < 2 * TypeAlignment) {
 
 2857        NumFailedAlignmentCheck++;
 
 2863    if (
TII->hasUnscaledLdStOffset(
MI))
 
 2864      ++NumUnscaledPairCreated;
 
 2866    MBBI = mergePairedInsns(
MBBI, Paired, Flags);
 
 2869    for (
auto I = std::next(Prev); 
I != 
MBBI; 
I++)
 
 2877bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
 
 2879  MachineInstr &
MI = *
MBBI;
 
 2891    if (
auto NextI = mergeUpdateInsn(
MBBI, Update, 
false,
 
 2900  if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
 
 2909  Update = findMatchingUpdateInsnBackward(
MBBI, 
UpdateLimit, MergeEither);
 
 2912    if (
auto NextI = mergeUpdateInsn(
MBBI, Update, 
true,
 
 2913                                     true, MergeEither)) {
 
 2922  int UnscaledOffset =
 
 2930  Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset, 
UpdateLimit);
 
 2933    if (
auto NextI = mergeUpdateInsn(
MBBI, Update, 
false,
 
 2946  MachineInstr &
MI = *
MBBI;
 
 2951  if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
 
 2963  if (Update != 
E && (
Offset & (Scale - 1)) == 0) {
 
 2972bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &
MBB,
 
 2973                                        bool EnableNarrowZeroStOpt) {
 
 3005  if (EnableNarrowZeroStOpt)
 
 3029    DefinedInBB.
clear();
 
 3038    if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
 
 3077bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
 
 3084  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
 3094  bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
 
 3095  for (
auto &
MBB : Fn) {
 
 3116  return new AArch64LoadStoreOpt();
 
 
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
 
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
 
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
 
static bool isPromotableLoadFromStore(MachineInstr &MI)
 
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
 
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
 
static unsigned getMatchingPairOpcode(unsigned Opc)
 
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
 
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
 
static bool needsWinCFI(const MachineFunction *MF)
 
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
 
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
 
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
 
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
 
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
 
static unsigned getPreIndexedOpcode(unsigned Opc)
 
#define AARCH64_LOAD_STORE_OPT_NAME
 
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
 
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
 
static bool isPromotableZeroStoreInst(MachineInstr &MI)
 
static unsigned getMatchingWideOpcode(unsigned Opc)
 
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
 
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
 
static bool isTagStore(const MachineInstr &MI)
 
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
 
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
 
static bool isRewritableImplicitDef(unsigned Opc)
 
static unsigned getPostIndexedOpcode(unsigned Opc)
 
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
 
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
 
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
 
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
 
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
 
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
 
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
 
static unsigned getBaseAddressOpcode(unsigned Opc)
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
 
MachineBasicBlock MachineBasicBlock::iterator MBBI
 
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
 
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
 
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
 
This file provides an implementation of debug counters.
 
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
 
const HexagonInstrInfo * TII
 
Register const TargetRegisterInfo * TRI
 
Promote Memory to Register
 
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
 
MachineInstr unsigned OpIdx
 
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
 
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
 
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
 
This file defines the SmallVector class.
 
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
 
#define STATISTIC(VARNAME, DESC)
 
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
 
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
 
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
 
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
 
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
 
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
 
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
 
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
 
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
 
const AArch64RegisterInfo * getRegisterInfo() const override
 
const AArch64InstrInfo * getInstrInfo() const override
 
const AArch64TargetLowering * getTargetLowering() const override
 
bool isLittleEndian() const
 
unsigned getRedZoneSize(const Function &F) const
 
Represent the analysis usage information of a pass.
 
AnalysisUsage & addRequired()
 
static bool shouldExecute(unsigned CounterName)
 
FunctionPass class - This class is used to implement most global optimizations.
 
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
 
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
 
A set of register units used to track register liveness.
 
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
 
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
 
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
 
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
 
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
 
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
 
void clear()
Clears the set.
 
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
 
An instruction for reading from memory.
 
bool usesWindowsCFI() const
 
OpType getOperation() const
 
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
 
iterator_range< succ_iterator > successors()
 
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
 
MachineInstrBundleIterator< MachineInstr > iterator
 
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
 
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
 
Properties which a MachineFunction may have at a given point in time.
 
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
 
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
 
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
 
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
 
Function & getFunction()
Return the LLVM function that this machine code represents.
 
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
 
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
 
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
 
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
 
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
 
const MachineInstrBuilder & add(const MachineOperand &MO) const
 
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
 
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
 
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
 
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
 
Representation of each machine instruction.
 
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
 
const MachineBasicBlock * getParent() const
 
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
 
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
 
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
 
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
 
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
 
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
 
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
 
LLVM_ABI void dump() const
 
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
 
const MachineOperand & getOperand(unsigned i) const
 
MachineOperand class - Representation of each machine instruction operand.
 
void setImplicit(bool Val=true)
 
bool isReg() const
isReg - Tests if this is a MO_Register operand.
 
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
 
void setIsKill(bool Val=true)
 
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
 
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
 
bool isEarlyClobber() const
 
Register getReg() const
getReg - Returns the register number.
 
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
 
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
 
Wrapper class representing virtual and physical registers.
 
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
 
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
 
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
 
void push_back(const T &Elt)
 
An instruction for storing to memory.
 
StringRef - Represent a constant reference to a string, i.e.
 
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
 
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
 
self_iterator getIterator()
 
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
 
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
 
Abstract Attribute helper functions.
 
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
 
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
 
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
 
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
 
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
 
@ C
The default llvm calling convention, compatible with C.
 
@ Define
Register definition.
 
initializer< Ty > init(const Ty &Val)
 
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
 
This is an optimization pass for GlobalISel generic memory operations.
 
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
 
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
 
FunctionAddr VTableAddr Value
 
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
 
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
 
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
 
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
 
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
 
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
 
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
 
FunctionAddr VTableAddr Count
 
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
 
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
 
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
 
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
 
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
 
DWARFExpression::Operation Op
 
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
 
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
 
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.