55#define DEBUG_TYPE "aarch64-ldst-opt"
57STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded,
"Number of post-index updates folded");
59STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
65 "not passed the alignment check");
67 "Number of const offset of index address folded");
69 "Number of UMOV + GPR stores folded to FPR stores");
72 "Controls which pairs are considered for renaming");
97#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
101using LdStPairFlags =
struct LdStPairFlags {
105 bool MergeForward =
false;
116 std::optional<MCPhysReg> RenameReg;
118 LdStPairFlags() =
default;
120 void setMergeForward(
bool V =
true) { MergeForward = V; }
121 bool getMergeForward()
const {
return MergeForward; }
123 void setSExtIdx(
int V) { SExtIdx = V; }
124 int getSExtIdx()
const {
return SExtIdx; }
126 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
127 void clearRenameReg() { RenameReg = std::nullopt; }
128 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
131struct AArch64LoadStoreOpt {
145 LdStPairFlags &Flags,
147 bool FindNarrowMerge);
158 const LdStPairFlags &Flags);
164 const LdStPairFlags &Flags);
176 int UnscaledOffset,
unsigned Limit);
198 unsigned BaseReg,
int Offset);
201 unsigned IndexReg,
unsigned &
Offset);
204 std::optional<MachineBasicBlock::iterator>
207 bool IsPreIdx,
bool MergeEither);
256char AArch64LoadStoreOptLegacy::ID = 0;
263static
bool isNarrowStore(
unsigned Opc) {
267 case AArch64::STRBBui:
268 case AArch64::STURBBi:
269 case AArch64::STRHHui:
270 case AArch64::STURHHi:
278 switch (
MI.getOpcode()) {
284 case AArch64::STZ2Gi:
290 bool *IsValidLdStrOpc =
nullptr) {
292 *IsValidLdStrOpc =
true;
296 *IsValidLdStrOpc =
false;
297 return std::numeric_limits<unsigned>::max();
298 case AArch64::STRDui:
299 case AArch64::STURDi:
300 case AArch64::STRDpre:
301 case AArch64::STRQui:
302 case AArch64::STURQi:
303 case AArch64::STRQpre:
304 case AArch64::STRBBui:
305 case AArch64::STURBBi:
306 case AArch64::STRHHui:
307 case AArch64::STURHHi:
308 case AArch64::STRWui:
309 case AArch64::STRWpre:
310 case AArch64::STURWi:
311 case AArch64::STRXui:
312 case AArch64::STRXpre:
313 case AArch64::STURXi:
314 case AArch64::STR_ZXI:
315 case AArch64::LDRDui:
316 case AArch64::LDURDi:
317 case AArch64::LDRDpre:
318 case AArch64::LDRQui:
319 case AArch64::LDURQi:
320 case AArch64::LDRQpre:
321 case AArch64::LDRWui:
322 case AArch64::LDURWi:
323 case AArch64::LDRWpre:
324 case AArch64::LDRXui:
325 case AArch64::LDURXi:
326 case AArch64::LDRXpre:
327 case AArch64::STRSui:
328 case AArch64::STURSi:
329 case AArch64::STRSpre:
330 case AArch64::LDRSui:
331 case AArch64::LDURSi:
332 case AArch64::LDRSpre:
333 case AArch64::LDR_ZXI:
335 case AArch64::LDRSWui:
336 return AArch64::LDRWui;
337 case AArch64::LDURSWi:
338 return AArch64::LDURWi;
339 case AArch64::LDRSWpre:
340 return AArch64::LDRWpre;
348 case AArch64::STRBBui:
349 return AArch64::STRHHui;
350 case AArch64::STRHHui:
351 return AArch64::STRWui;
352 case AArch64::STURBBi:
353 return AArch64::STURHHi;
354 case AArch64::STURHHi:
355 return AArch64::STURWi;
356 case AArch64::STURWi:
357 return AArch64::STURXi;
358 case AArch64::STRWui:
359 return AArch64::STRXui;
367 case AArch64::STRSui:
368 case AArch64::STURSi:
369 return AArch64::STPSi;
370 case AArch64::STRSpre:
371 return AArch64::STPSpre;
372 case AArch64::STRDui:
373 case AArch64::STURDi:
374 return AArch64::STPDi;
375 case AArch64::STRDpre:
376 return AArch64::STPDpre;
377 case AArch64::STRQui:
378 case AArch64::STURQi:
379 case AArch64::STR_ZXI:
380 return AArch64::STPQi;
381 case AArch64::STRQpre:
382 return AArch64::STPQpre;
383 case AArch64::STRWui:
384 case AArch64::STURWi:
385 return AArch64::STPWi;
386 case AArch64::STRWpre:
387 return AArch64::STPWpre;
388 case AArch64::STRXui:
389 case AArch64::STURXi:
390 return AArch64::STPXi;
391 case AArch64::STRXpre:
392 return AArch64::STPXpre;
393 case AArch64::LDRSui:
394 case AArch64::LDURSi:
395 return AArch64::LDPSi;
396 case AArch64::LDRSpre:
397 return AArch64::LDPSpre;
398 case AArch64::LDRDui:
399 case AArch64::LDURDi:
400 return AArch64::LDPDi;
401 case AArch64::LDRDpre:
402 return AArch64::LDPDpre;
403 case AArch64::LDRQui:
404 case AArch64::LDURQi:
405 case AArch64::LDR_ZXI:
406 return AArch64::LDPQi;
407 case AArch64::LDRQpre:
408 return AArch64::LDPQpre;
409 case AArch64::LDRWui:
410 case AArch64::LDURWi:
411 return AArch64::LDPWi;
412 case AArch64::LDRWpre:
413 return AArch64::LDPWpre;
414 case AArch64::LDRXui:
415 case AArch64::LDURXi:
416 return AArch64::LDPXi;
417 case AArch64::LDRXpre:
418 return AArch64::LDPXpre;
419 case AArch64::LDRSWui:
420 case AArch64::LDURSWi:
421 return AArch64::LDPSWi;
422 case AArch64::LDRSWpre:
423 return AArch64::LDPSWpre;
434 case AArch64::LDRBBui:
435 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
436 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
437 case AArch64::LDURBBi:
438 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
439 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
440 case AArch64::LDRHHui:
441 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
442 StOpc == AArch64::STRXui;
443 case AArch64::LDURHHi:
444 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
445 StOpc == AArch64::STURXi;
446 case AArch64::LDRWui:
447 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
448 case AArch64::LDURWi:
449 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
450 case AArch64::LDRXui:
451 return StOpc == AArch64::STRXui;
452 case AArch64::LDURXi:
453 return StOpc == AArch64::STURXi;
465 case AArch64::STRBui:
466 return AArch64::STRBpre;
467 case AArch64::STRHui:
468 return AArch64::STRHpre;
469 case AArch64::STRSui:
470 return AArch64::STRSpre;
471 case AArch64::STRDui:
472 return AArch64::STRDpre;
473 case AArch64::STRQui:
474 return AArch64::STRQpre;
475 case AArch64::STRBBui:
476 return AArch64::STRBBpre;
477 case AArch64::STRHHui:
478 return AArch64::STRHHpre;
479 case AArch64::STRWui:
480 return AArch64::STRWpre;
481 case AArch64::STRXui:
482 return AArch64::STRXpre;
483 case AArch64::LDRBui:
484 return AArch64::LDRBpre;
485 case AArch64::LDRHui:
486 return AArch64::LDRHpre;
487 case AArch64::LDRSui:
488 return AArch64::LDRSpre;
489 case AArch64::LDRDui:
490 return AArch64::LDRDpre;
491 case AArch64::LDRQui:
492 return AArch64::LDRQpre;
493 case AArch64::LDRBBui:
494 return AArch64::LDRBBpre;
495 case AArch64::LDRHHui:
496 return AArch64::LDRHHpre;
497 case AArch64::LDRWui:
498 return AArch64::LDRWpre;
499 case AArch64::LDRXui:
500 return AArch64::LDRXpre;
501 case AArch64::LDRSWui:
502 return AArch64::LDRSWpre;
504 return AArch64::LDPSpre;
505 case AArch64::LDPSWi:
506 return AArch64::LDPSWpre;
508 return AArch64::LDPDpre;
510 return AArch64::LDPQpre;
512 return AArch64::LDPWpre;
514 return AArch64::LDPXpre;
516 return AArch64::STPSpre;
518 return AArch64::STPDpre;
520 return AArch64::STPQpre;
522 return AArch64::STPWpre;
524 return AArch64::STPXpre;
526 return AArch64::STGPreIndex;
528 return AArch64::STZGPreIndex;
530 return AArch64::ST2GPreIndex;
531 case AArch64::STZ2Gi:
532 return AArch64::STZ2GPreIndex;
534 return AArch64::STGPpre;
543 case AArch64::LDRBroX:
544 return AArch64::LDRBui;
545 case AArch64::LDRBBroX:
546 return AArch64::LDRBBui;
547 case AArch64::LDRSBXroX:
548 return AArch64::LDRSBXui;
549 case AArch64::LDRSBWroX:
550 return AArch64::LDRSBWui;
551 case AArch64::LDRHroX:
552 return AArch64::LDRHui;
553 case AArch64::LDRHHroX:
554 return AArch64::LDRHHui;
555 case AArch64::LDRSHXroX:
556 return AArch64::LDRSHXui;
557 case AArch64::LDRSHWroX:
558 return AArch64::LDRSHWui;
559 case AArch64::LDRWroX:
560 return AArch64::LDRWui;
561 case AArch64::LDRSroX:
562 return AArch64::LDRSui;
563 case AArch64::LDRSWroX:
564 return AArch64::LDRSWui;
565 case AArch64::LDRDroX:
566 return AArch64::LDRDui;
567 case AArch64::LDRXroX:
568 return AArch64::LDRXui;
569 case AArch64::LDRQroX:
570 return AArch64::LDRQui;
578 case AArch64::STRBui:
579 return AArch64::STRBpost;
580 case AArch64::STRHui:
581 return AArch64::STRHpost;
582 case AArch64::STRSui:
583 case AArch64::STURSi:
584 return AArch64::STRSpost;
585 case AArch64::STRDui:
586 case AArch64::STURDi:
587 return AArch64::STRDpost;
588 case AArch64::STRQui:
589 case AArch64::STURQi:
590 return AArch64::STRQpost;
591 case AArch64::STRBBui:
592 return AArch64::STRBBpost;
593 case AArch64::STRHHui:
594 return AArch64::STRHHpost;
595 case AArch64::STRWui:
596 case AArch64::STURWi:
597 return AArch64::STRWpost;
598 case AArch64::STRXui:
599 case AArch64::STURXi:
600 return AArch64::STRXpost;
601 case AArch64::LDRBui:
602 return AArch64::LDRBpost;
603 case AArch64::LDRHui:
604 return AArch64::LDRHpost;
605 case AArch64::LDRSui:
606 case AArch64::LDURSi:
607 return AArch64::LDRSpost;
608 case AArch64::LDRDui:
609 case AArch64::LDURDi:
610 return AArch64::LDRDpost;
611 case AArch64::LDRQui:
612 case AArch64::LDURQi:
613 return AArch64::LDRQpost;
614 case AArch64::LDRBBui:
615 return AArch64::LDRBBpost;
616 case AArch64::LDRHHui:
617 return AArch64::LDRHHpost;
618 case AArch64::LDRWui:
619 case AArch64::LDURWi:
620 return AArch64::LDRWpost;
621 case AArch64::LDRXui:
622 case AArch64::LDURXi:
623 return AArch64::LDRXpost;
624 case AArch64::LDRSWui:
625 return AArch64::LDRSWpost;
627 return AArch64::LDPSpost;
628 case AArch64::LDPSWi:
629 return AArch64::LDPSWpost;
631 return AArch64::LDPDpost;
633 return AArch64::LDPQpost;
635 return AArch64::LDPWpost;
637 return AArch64::LDPXpost;
639 return AArch64::STPSpost;
641 return AArch64::STPDpost;
643 return AArch64::STPQpost;
645 return AArch64::STPWpost;
647 return AArch64::STPXpost;
649 return AArch64::STGPostIndex;
651 return AArch64::STZGPostIndex;
653 return AArch64::ST2GPostIndex;
654 case AArch64::STZ2Gi:
655 return AArch64::STZ2GPostIndex;
657 return AArch64::STGPpost;
664 unsigned OpcB =
MI.getOpcode();
669 case AArch64::STRSpre:
670 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
671 case AArch64::STRDpre:
672 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
673 case AArch64::STRQpre:
674 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
675 case AArch64::STRWpre:
676 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
677 case AArch64::STRXpre:
678 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
679 case AArch64::LDRSpre:
680 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
681 case AArch64::LDRDpre:
682 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
683 case AArch64::LDRQpre:
684 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
685 case AArch64::LDRWpre:
686 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
687 case AArch64::LDRXpre:
688 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
689 case AArch64::LDRSWpre:
690 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
696 int &MinOffset,
int &MaxOffset) {
714 unsigned PairedRegOp = 0) {
715 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
721 return MI.getOperand(Idx);
730 int UnscaledStOffset =
734 int UnscaledLdOffset =
738 return (UnscaledStOffset <= UnscaledLdOffset) &&
739 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
743 unsigned Opc =
MI.getOpcode();
744 return (
Opc == AArch64::STRWui ||
Opc == AArch64::STURWi ||
745 isNarrowStore(
Opc)) &&
750 switch (
MI.getOpcode()) {
754 case AArch64::LDRBBui:
755 case AArch64::LDRHHui:
756 case AArch64::LDRWui:
757 case AArch64::LDRXui:
759 case AArch64::LDURBBi:
760 case AArch64::LDURHHi:
761 case AArch64::LDURWi:
762 case AArch64::LDURXi:
768 unsigned Opc =
MI.getOpcode();
773 case AArch64::STRBui:
774 case AArch64::STRHui:
775 case AArch64::STRSui:
776 case AArch64::STRDui:
777 case AArch64::STRQui:
778 case AArch64::STRXui:
779 case AArch64::STRWui:
780 case AArch64::STRHHui:
781 case AArch64::STRBBui:
782 case AArch64::LDRBui:
783 case AArch64::LDRHui:
784 case AArch64::LDRSui:
785 case AArch64::LDRDui:
786 case AArch64::LDRQui:
787 case AArch64::LDRXui:
788 case AArch64::LDRWui:
789 case AArch64::LDRHHui:
790 case AArch64::LDRBBui:
794 case AArch64::STZ2Gi:
797 case AArch64::STURSi:
798 case AArch64::STURDi:
799 case AArch64::STURQi:
800 case AArch64::STURWi:
801 case AArch64::STURXi:
802 case AArch64::LDURSi:
803 case AArch64::LDURDi:
804 case AArch64::LDURQi:
805 case AArch64::LDURWi:
806 case AArch64::LDURXi:
809 case AArch64::LDPSWi:
838 unsigned Opc =
MI.getOpcode();
844 case AArch64::LDRBroX:
845 case AArch64::LDRBBroX:
846 case AArch64::LDRSBXroX:
847 case AArch64::LDRSBWroX:
850 case AArch64::LDRHroX:
851 case AArch64::LDRHHroX:
852 case AArch64::LDRSHXroX:
853 case AArch64::LDRSHWroX:
856 case AArch64::LDRWroX:
857 case AArch64::LDRSroX:
858 case AArch64::LDRSWroX:
861 case AArch64::LDRDroX:
862 case AArch64::LDRXroX:
865 case AArch64::LDRQroX:
875 case AArch64::ORRWrs:
876 case AArch64::ADDWri:
884 const LdStPairFlags &Flags) {
886 "Expected promotable zero stores.");
894 if (NextI == MergeMI)
897 unsigned Opc =
I->getOpcode();
898 unsigned MergeMIOpc = MergeMI->getOpcode();
899 bool IsScaled = !
TII->hasUnscaledLdStOffset(
Opc);
900 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
901 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
902 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
904 bool MergeForward =
Flags.getMergeForward();
910 const MachineOperand &BaseRegOp =
912 : AArch64InstrInfo::getLdStBaseOp(*
I);
915 int64_t IOffsetInBytes =
917 int64_t MIOffsetInBytes =
922 if (IOffsetInBytes > MIOffsetInBytes)
923 OffsetImm = MIOffsetInBytes;
925 OffsetImm = IOffsetInBytes;
930 if (!
TII->hasUnscaledLdStOffset(NewOpcode)) {
931 int NewOffsetStride =
TII->getMemScale(NewOpcode);
932 assert(((OffsetImm % NewOffsetStride) == 0) &&
933 "Offset should be a multiple of the store memory scale");
934 OffsetImm = OffsetImm / NewOffsetStride;
940 MachineInstrBuilder MIB;
942 .
addReg(isNarrowStore(
Opc) ? AArch64::WZR : AArch64::XZR)
946 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
949 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
958 I->eraseFromParent();
959 MergeMI->eraseFromParent();
969 auto MBB =
MI.getParent();
977 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
978 TRI->regsOverlap(MOP.getReg(), DefReg);
992 if (MOP.isReg() && MOP.isKill())
996 if (MOP.isReg() && !MOP.isKill())
997 Units.
addReg(MOP.getReg());
1004 unsigned InstrNumToSet,
1011 unsigned OperandNo = 0;
1012 bool RegFound =
false;
1013 for (
const auto Op : MergedInstr.
operands()) {
1014 if (
Op.getReg() ==
Reg) {
1023 {InstrNumToSet, OperandNo});
1029 const LdStPairFlags &Flags) {
1036 if (NextI == Paired)
1039 int SExtIdx =
Flags.getSExtIdx();
1042 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
Opc);
1043 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
1045 bool MergeForward =
Flags.getMergeForward();
1047 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
1050 DefinedInBB.
addReg(*RenameReg);
1054 auto GetMatchingSubReg =
1055 [
this, RenameReg](
const TargetRegisterClass *
C) ->
MCPhysReg {
1057 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1058 if (
C->contains(SubOrSuper))
1064 std::function<bool(MachineInstr &,
bool)> UpdateMIs =
1065 [
this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &
MI,
1068 bool SeenDef =
false;
1070 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1074 (!MergeForward || !SeenDef ||
1076 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1079 "Need renamable operands");
1081 if (
const TargetRegisterClass *RC =
1083 MatchingReg = GetMatchingSubReg(RC);
1087 MatchingReg = GetMatchingSubReg(
1088 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1096 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1098 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1101 "Need renamable operands");
1103 if (
const TargetRegisterClass *RC =
1105 MatchingReg = GetMatchingSubReg(RC);
1107 MatchingReg = GetMatchingSubReg(
1108 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1109 assert(MatchingReg != AArch64::NoRegister &&
1110 "Cannot find matching regs for renaming");
1119 TRI, UINT32_MAX, UpdateMIs);
1132 RegToCheck = RegToRename;
1135 MergeForward ? std::next(
I) :
I,
1136 MergeForward ? std::next(Paired) : Paired))
1138 [
this, RegToCheck](
const MachineOperand &MOP) {
1139 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1141 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1143 "Rename register used between paired instruction, trashing the "
1153 const MachineOperand &BaseRegOp =
1155 : AArch64InstrInfo::getLdStBaseOp(*
I);
1159 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1160 if (IsUnscaled != PairedIsUnscaled) {
1164 int MemSize =
TII->getMemScale(*Paired);
1165 if (PairedIsUnscaled) {
1168 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1169 "Offset should be a multiple of the stride!");
1170 PairedOffset /= MemSize;
1172 PairedOffset *= MemSize;
1179 MachineInstr *RtMI, *Rt2MI;
1180 if (
Offset == PairedOffset + OffsetStride &&
1188 SExtIdx = (SExtIdx + 1) % 2;
1196 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1197 "Unscaled offset cannot be scaled.");
1198 OffsetImm /=
TII->getMemScale(*RtMI);
1202 MachineInstrBuilder MIB;
1207 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1209 if (RegOp0.
isUse()) {
1210 if (!MergeForward) {
1221 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1222 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1230 for (MachineInstr &
MI :
1231 make_range(std::next(
I->getIterator()), Paired->getIterator()))
1248 .setMIFlags(
I->mergeFlagsWith(*Paired));
1253 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1258 if (SExtIdx != -1) {
1263 MachineOperand &DstMO = MIB->
getOperand(SExtIdx);
1268 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1277 MachineInstrBuilder MIBKill =
1278 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1280 .
addReg(DstRegX, RegState::Define);
1283 MachineInstrBuilder MIBSXTW =
1284 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1324 if (
I->peekDebugInstrNum()) {
1332 unsigned NewInstrNum;
1333 if (DstRegX ==
I->getOperand(0).getReg()) {
1342 if (Paired->peekDebugInstrNum()) {
1350 unsigned NewInstrNum;
1351 if (DstRegX == Paired->getOperand(0).getReg()) {
1364 }
else if (
Opc == AArch64::LDR_ZXI ||
Opc == AArch64::STR_ZXI) {
1370 AArch64::ZPRRegClass.contains(MOp1.
getReg()) &&
"Invalid register.");
1371 MOp0.
setReg(AArch64::Q0 + (MOp0.
getReg() - AArch64::Z0));
1372 MOp1.
setReg(AArch64::Q0 + (MOp1.
getReg() - AArch64::Z0));
1403 if (
I->peekDebugInstrNum()) {
1408 if (Paired->peekDebugInstrNum()) {
1428 SmallSetVector<Register, 4>
Ops;
1429 for (
const MachineOperand &MO :
1431 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1432 Ops.insert(MO.getReg());
1433 for (
const MachineOperand &MO :
1435 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1436 Ops.insert(MO.getReg());
1438 MIB.addDef(
Op, RegState::Implicit);
1440 CopyImplicitOps(
I, Paired);
1443 I->eraseFromParent();
1444 Paired->eraseFromParent();
1453 next_nodbg(LoadI, LoadI->getParent()->end());
1455 int LoadSize =
TII->getMemScale(*LoadI);
1456 int StoreSize =
TII->getMemScale(*StoreI);
1460 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1463 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1464 "Unexpected RegClass");
1466 MachineInstr *BitExtMI;
1467 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1470 if (StRt == LdRt && LoadSize == 8) {
1471 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1472 LoadI->getIterator())) {
1473 if (
MI.killsRegister(StRt,
TRI)) {
1474 MI.clearRegisterKills(StRt,
TRI);
1481 LoadI->eraseFromParent();
1486 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1487 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1488 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1497 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1498 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1499 "Unsupported ld/st match");
1500 assert(LoadSize <= StoreSize &&
"Invalid load size");
1501 int UnscaledLdOffset =
1505 int UnscaledStOffset =
1509 int Width = LoadSize * 8;
1512 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1515 assert((UnscaledLdOffset >= UnscaledStOffset &&
1516 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1519 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1520 int Imms = Immr + Width - 1;
1521 if (UnscaledLdOffset == UnscaledStOffset) {
1522 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1528 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1529 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1534 }
else if (IsStoreXReg && Imms == 31) {
1537 assert(Immr <= Imms &&
"Expected LSR alias of UBFM");
1538 BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1539 TII->get(AArch64::UBFMWri),
1540 TRI->getSubReg(DestReg, AArch64::sub_32))
1541 .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
1547 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1548 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1558 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1560 if (
MI.killsRegister(StRt,
TRI)) {
1561 MI.clearRegisterKills(StRt,
TRI);
1576 LoadI->eraseFromParent();
1586 if (
Offset % OffsetStride)
1598 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1615bool AArch64LoadStoreOpt::findMatchingStore(
1620 MachineInstr &LoadMI = *
I;
1630 ModifiedRegUnits.
clear();
1631 UsedRegUnits.
clear();
1636 MachineInstr &
MI = *
MBBI;
1640 if (!
MI.isTransient())
1666 if (!ModifiedRegUnits.
available(BaseReg))
1684 LdStPairFlags &Flags,
1687 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1692 !
TII->isLdStPairSuppressed(FirstMI) &&
1693 "FirstMI shouldn't get here if either of these checks are true.");
1700 unsigned OpcB =
MI.getOpcode();
1708 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1709 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1717 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1719 assert(IsValidLdStrOpc &&
1720 "Given Opc should be a Load or Store with an immediate");
1723 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1729 if (!PairIsValidLdStrOpc)
1734 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1737 TII->getMemScale(FirstMI) ==
TII->getMemScale(
MI);
1746 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1755 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1762 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1763 (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1764 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1765 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1768 <<
" Cannot rename operands with multiple disjunct subregisters ("
1779 return TRI->isSuperOrSubRegisterEq(
1802 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1803 MOP.isImplicit() && MOP.isKill() &&
1804 TRI->regsOverlap(RegToRename, MOP.getReg());
1810 bool FoundDef =
false;
1841 if (
MI.isPseudo()) {
1842 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1846 for (
auto &MOP :
MI.operands()) {
1848 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1858 for (
auto &MOP :
MI.operands()) {
1860 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1877 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1905 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1907 if (MI.getFlag(MachineInstr::FrameSetup)) {
1908 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1913 for (
auto &MOP :
MI.operands()) {
1914 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1915 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1917 if (!canRenameMOP(MOP, TRI)) {
1918 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1944 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1945 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1947 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1953 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1956 TRI->sub_and_superregs_inclusive(PR),
1957 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1961 auto *RegClass =
TRI->getMinimalPhysRegClass(
Reg);
1964 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1965 CanBeUsedForAllClasses(PR)) {
1973 <<
TRI->getRegClassName(RegClass) <<
"\n");
1974 return std::nullopt;
1985 std::optional<MCPhysReg> RenameReg;
1994 const bool IsLoad = FirstMI.
mayLoad();
1996 if (!MaybeCanRename) {
1999 RequiredClasses,
TRI)};
2005 if (*MaybeCanRename) {
2007 RequiredClasses,
TRI);
2016 LdStPairFlags &Flags,
unsigned Limit,
2017 bool FindNarrowMerge) {
2021 MachineInstr &FirstMI = *
I;
2025 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
2029 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
2032 std::optional<bool> MaybeCanRename;
2034 MaybeCanRename = {
false};
2036 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2037 LiveRegUnits UsedInBetween;
2040 Flags.clearRenameReg();
2044 ModifiedRegUnits.
clear();
2045 UsedRegUnits.
clear();
2048 SmallVector<MachineInstr *, 4> MemInsns;
2053 MachineInstr &
MI = *
MBBI;
2060 if (!
MI.isTransient())
2063 Flags.setSExtIdx(-1);
2066 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
2075 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2076 if (IsUnscaled != MIIsUnscaled) {
2080 int MemSize =
TII->getMemScale(
MI);
2084 if (MIOffset % MemSize) {
2090 MIOffset /= MemSize;
2092 MIOffset *= MemSize;
2098 if (BaseReg == MIBaseReg) {
2104 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
2105 bool IsBaseRegUsed = !UsedRegUnits.
available(
2107 bool IsBaseRegModified = !ModifiedRegUnits.
available(
2112 bool IsMIRegTheSame =
2115 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2123 if ((
Offset != MIOffset + OffsetStride) &&
2124 (
Offset + OffsetStride != MIOffset)) {
2133 if (FindNarrowMerge) {
2138 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
2155 <<
"keep looking.\n");
2161 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
2166 <<
"Offset doesn't fit due to alignment requirements, "
2167 <<
"keep looking.\n");
2178 if (!ModifiedRegUnits.
available(BaseReg))
2181 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
2188 bool RtNotModified =
2190 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
2193 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
2195 << (RtNotModified ?
"true" :
"false") <<
"\n"
2197 << (RtNotUsed ?
"true" :
"false") <<
"\n");
2199 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
2204 std::optional<MCPhysReg> RenameReg =
2206 Reg, DefinedInBB, UsedInBetween,
2207 RequiredClasses,
TRI);
2213 <<
"keep looking.\n");
2216 Flags.setRenameReg(*RenameReg);
2219 Flags.setMergeForward(
false);
2221 Flags.clearRenameReg();
2232 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
2234 <<
"' not modified: "
2235 << (RtNotModified ?
"true" :
"false") <<
"\n");
2237 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
2239 Flags.setMergeForward(
true);
2240 Flags.clearRenameReg();
2245 MaybeCanRename, FirstMI,
MI,
Reg, DefinedInBB, UsedInBetween,
2246 RequiredClasses,
TRI);
2248 Flags.setMergeForward(
true);
2249 Flags.setRenameReg(*RenameReg);
2250 MBBIWithRenameReg =
MBBI;
2253 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
2254 <<
"interference in between, keep looking.\n");
2258 if (
Flags.getRenameReg())
2259 return MBBIWithRenameReg;
2273 if (!ModifiedRegUnits.
available(BaseReg)) {
2279 if (
MI.mayLoadOrStore())
2287 assert((
MI.getOpcode() == AArch64::SUBXri ||
2288 MI.getOpcode() == AArch64::ADDXri) &&
2289 "Expected a register update instruction");
2290 auto End =
MI.getParent()->end();
2291 if (MaybeCFI == End ||
2292 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2295 MI.getOperand(0).getReg() != AArch64::SP)
2299 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2310std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2312 bool IsForward,
bool IsPreIdx,
bool MergeEither) {
2313 assert((Update->getOpcode() == AArch64::ADDXri ||
2314 Update->getOpcode() == AArch64::SUBXri) &&
2315 "Unexpected base register update instruction to merge!");
2331 if (std::any_of(std::next(CFI),
I, [](
const auto &Insn) {
2332 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2334 return std::nullopt;
2336 MachineBasicBlock *
MBB = InsertPt->getParent();
2345 if (NextI == Update)
2348 int Value = Update->getOperand(2).getImm();
2350 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2351 if (Update->getOpcode() == AArch64::SUBXri)
2356 MachineInstrBuilder MIB;
2357 int Scale, MinOffset, MaxOffset;
2361 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2363 .
add(Update->getOperand(0))
2371 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2373 .
add(Update->getOperand(0))
2398 I->eraseFromParent();
2399 Update->eraseFromParent();
2407 unsigned Offset,
int Scale) {
2408 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2409 "Unexpected const mov instruction to merge!");
2413 MachineInstr &MemMI = *
I;
2414 unsigned Mask = (1 << 12) * Scale - 1;
2419 MachineInstrBuilder AddMIB, MemMIB;
2423 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2431 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2439 ++NumConstOffsetFolded;
2454 I->eraseFromParent();
2455 PrevI->eraseFromParent();
2456 Update->eraseFromParent();
2461bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2463 unsigned BaseReg,
int Offset) {
2464 switch (
MI.getOpcode()) {
2467 case AArch64::SUBXri:
2468 case AArch64::ADDXri:
2471 if (!
MI.getOperand(2).isImm())
2479 if (
MI.getOperand(0).getReg() != BaseReg ||
2480 MI.getOperand(1).getReg() != BaseReg)
2483 int UpdateOffset =
MI.getOperand(2).getImm();
2484 if (
MI.getOpcode() == AArch64::SUBXri)
2485 UpdateOffset = -UpdateOffset;
2489 int Scale, MinOffset, MaxOffset;
2491 if (UpdateOffset % Scale != 0)
2495 int ScaledOffset = UpdateOffset / Scale;
2496 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2508bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2514 if (
MI.getOpcode() == AArch64::MOVKWi &&
2515 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2524 MachineInstr &MovzMI = *
MBBI;
2526 if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
2529 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2532 return Offset >> 24 == 0;
2541 MachineInstr &MemMI = *
I;
2546 TII->getMemScale(MemMI);
2551 if (MIUnscaledOffset != UnscaledOffset)
2562 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2564 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2571 ModifiedRegUnits.
clear();
2572 UsedRegUnits.
clear();
2578 const bool BaseRegSP =
BaseReg == AArch64::SP;
2587 MachineBasicBlock *CurMBB =
I->getParent();
2594 MachineInstr &
MI = *
MBBI;
2598 if (!
MI.isTransient())
2602 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2613 if (!ModifiedRegUnits.
available(BaseReg) ||
2615 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2619 if (!VisitSucc || Limit <=
Count)
2624 MachineBasicBlock *SuccToVisit =
nullptr;
2625 unsigned LiveSuccCount = 0;
2626 for (MachineBasicBlock *Succ : CurMBB->
successors()) {
2627 for (MCRegAliasIterator AI(BaseReg,
TRI,
true); AI.isValid(); ++AI) {
2628 if (Succ->isLiveIn(*AI)) {
2629 if (LiveSuccCount++)
2631 if (Succ->pred_size() == 1)
2639 CurMBB = SuccToVisit;
2650 MachineInstr &MemMI = *
I;
2652 MachineFunction &MF = *MemMI.
getMF();
2660 : AArch64::NoRegister};
2669 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i)
2670 if (DestReg[i] == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg[i]))
2674 const bool BaseRegSP =
BaseReg == AArch64::SP;
2682 const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
2683 unsigned RedZoneSize =
2688 ModifiedRegUnits.
clear();
2689 UsedRegUnits.
clear();
2691 bool MemAccessBeforeSPPreInc =
false;
2695 MachineInstr &
MI = *
MBBI;
2699 if (!
MI.isTransient())
2703 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2706 if (MemAccessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2716 if (!ModifiedRegUnits.
available(BaseReg) ||
2724 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
2725 (DestReg[0] != AArch64::NoRegister &&
2726 !(ModifiedRegUnits.
available(DestReg[0]) &&
2728 (DestReg[1] != AArch64::NoRegister &&
2729 !(ModifiedRegUnits.
available(DestReg[1]) &&
2731 MergeEither =
false;
2736 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2737 MemAccessBeforeSPPreInc =
true;
2743AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2747 MachineInstr &MemMI = *
I;
2766 ModifiedRegUnits.
clear();
2767 UsedRegUnits.
clear();
2771 MachineInstr &
MI = *
MBBI;
2775 if (!
MI.isTransient())
2779 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2788 if (!ModifiedRegUnits.
available(IndexReg) ||
2796bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2798 MachineInstr &
MI = *
MBBI;
2800 if (
MI.hasOrderedMemoryRef())
2814 ++NumLoadsFromStoresPromoted;
2818 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2825bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2828 MachineInstr &
MI = *
MBBI;
2831 if (!
TII->isCandidateToMergeOrPair(
MI))
2835 LdStPairFlags
Flags;
2839 ++NumZeroStoresPromoted;
2843 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2852 MachineInstr &
MI = *
MBBI;
2855 if (!
TII->isCandidateToMergeOrPair(
MI))
2859 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2863 if (
MI.mayStore() && Subtarget->hasDisableStp())
2869 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2871 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2879 LdStPairFlags
Flags;
2885 auto Prev = std::prev(
MBBI);
2889 MachineMemOperand *MemOp =
2890 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2895 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2896 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2898 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2899 NumFailedAlignmentCheck++;
2905 uint64_t MemAlignment = MemOp->getAlign().value();
2906 uint64_t TypeAlignment =
2907 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2909 if (MemAlignment < 2 * TypeAlignment) {
2910 NumFailedAlignmentCheck++;
2916 if (
TII->hasUnscaledLdStOffset(
MI))
2917 ++NumUnscaledPairCreated;
2919 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2922 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2930bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2932 MachineInstr &
MI = *
MBBI;
2941 if (
MBBI->hasOrderedMemoryRef() &&
2954 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2963 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2972 Update = findMatchingUpdateInsnBackward(
MBBI,
UpdateLimit, MergeEither);
2975 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
true,
2976 true, MergeEither)) {
2985 int UnscaledOffset =
2993 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2996 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
3009 MachineInstr &
MI = *
MBBI;
3014 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
3026 if (Update !=
E && (
Offset & (Scale - 1)) == 0) {
3038 switch (GPRStoreOpc) {
3040 case AArch64::STRBBui:
3041 return AArch64::STRBui;
3042 case AArch64::STRHHui:
3043 return AArch64::STRHui;
3044 case AArch64::STRWui:
3045 return AArch64::STRSui;
3046 case AArch64::STRXui:
3047 return AArch64::STRDui;
3049 case AArch64::STURBBi:
3050 return AArch64::STURBi;
3051 case AArch64::STURHHi:
3052 return AArch64::STURHi;
3053 case AArch64::STURWi:
3054 return AArch64::STURSi;
3055 case AArch64::STURXi:
3056 return AArch64::STURDi;
3058 case AArch64::STRBBroW:
3059 return AArch64::STRBroW;
3060 case AArch64::STRBBroX:
3061 return AArch64::STRBroX;
3062 case AArch64::STRHHroW:
3063 return AArch64::STRHroW;
3064 case AArch64::STRHHroX:
3065 return AArch64::STRHroX;
3066 case AArch64::STRWroW:
3067 return AArch64::STRSroW;
3068 case AArch64::STRWroX:
3069 return AArch64::STRSroX;
3070 case AArch64::STRXroW:
3071 return AArch64::STRDroW;
3072 case AArch64::STRXroX:
3073 return AArch64::STRDroX;
3083 case AArch64::UMOVvi8_idx0:
3084 return AArch64::bsub;
3085 case AArch64::UMOVvi16_idx0:
3086 return AArch64::hsub;
3087 case AArch64::UMOVvi32_idx0:
3088 return AArch64::ssub;
3089 case AArch64::UMOVvi64_idx0:
3090 return AArch64::dsub;
3096bool AArch64LoadStoreOpt::tryToReplaceUMOVStore(
3098 MachineInstr &StoreMI = *
MBBI;
3121 MachineInstr *UMOVMI =
nullptr;
3123 unsigned SubRegIdx = 0;
3125 for (
auto It =
MBBI; It !=
B;) {
3126 MachineInstr &
MI = *--It;
3127 if (
MI.isDebugInstr())
3131 if (
MI.readsRegister(StoreValReg,
TRI))
3133 if (
MI.modifiesRegister(StoreValReg,
TRI)) {
3151 if (It->modifiesRegister(VecReg,
TRI))
3153 if (!VecRegKilled && It->killsRegister(VecReg,
TRI))
3154 VecRegKilled =
true;
3161 It->clearRegisterKills(VecReg,
TRI);
3163 LLVM_DEBUG(
dbgs() <<
"Folding UMOV + store: " << *UMOVMI <<
" + "
3176 ++NumUMOVFoldedToFPRStore;
3180bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &
MBB,
3181 bool EnableNarrowZeroStOpt) {
3213 if (EnableNarrowZeroStOpt)
3237 DefinedInBB.
clear();
3246 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
3290 if (tryToReplaceUMOVStore(
MBBI))
3299bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3312 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3313 for (
auto &
MBB : Fn) {
3331bool AArch64LoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
3334 AArch64LoadStoreOpt Impl;
3335 Impl.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3336 return Impl.runOnMachineFunction(MF);
3342 return new AArch64LoadStoreOptLegacy();
3348 AArch64LoadStoreOpt Impl;
3352 bool Changed = Impl.runOnMachineFunction(MF);
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static unsigned getGPRToFPRStoreOpcode(unsigned GPRStoreOpc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static unsigned getUMOVSubRegIdx(unsigned UMOVOpc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static cl::opt< unsigned > UMOVFoldLimit("aarch64-umov-fold-scan-limit", cl::init(16), cl::Hidden)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
unsigned getRedZoneSize(const Function &F) const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represents analyses that only rely on functions' control flow.
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr RegState getKillRegState(bool B)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64LoadStoreOptLegacyPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
FunctionAddr VTableAddr Count
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.