54#define DEBUG_TYPE "aarch64-ldst-opt"
56STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
57STATISTIC(NumPostFolded,
"Number of post-index updates folded");
58STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
60 "Number of load/store from unscaled generated");
61STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
62STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
63STATISTIC(NumFailedAlignmentCheck,
"Number of load/store pair transformation "
64 "not passed the alignment check");
66 "Number of const offset of index address folded");
69 "Controls which pairs are considered for renaming");
89#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
93using LdStPairFlags =
struct LdStPairFlags {
97 bool MergeForward =
false;
108 std::optional<MCPhysReg> RenameReg;
110 LdStPairFlags() =
default;
112 void setMergeForward(
bool V =
true) { MergeForward = V; }
113 bool getMergeForward()
const {
return MergeForward; }
115 void setSExtIdx(
int V) { SExtIdx = V; }
116 int getSExtIdx()
const {
return SExtIdx; }
118 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
119 void clearRenameReg() { RenameReg = std::nullopt; }
120 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
146 LdStPairFlags &Flags,
148 bool FindNarrowMerge);
159 const LdStPairFlags &Flags);
165 const LdStPairFlags &Flags);
177 int UnscaledOffset,
unsigned Limit);
199 unsigned BaseReg,
int Offset);
202 unsigned IndexReg,
unsigned &
Offset);
205 std::optional<MachineBasicBlock::iterator>
208 bool IsPreIdx,
bool MergeEither);
241char AArch64LoadStoreOpt::ID = 0;
248static
bool isNarrowStore(
unsigned Opc) {
252 case AArch64::STRBBui:
253 case AArch64::STURBBi:
254 case AArch64::STRHHui:
255 case AArch64::STURHHi:
263 switch (
MI.getOpcode()) {
269 case AArch64::STZ2Gi:
275 bool *IsValidLdStrOpc =
nullptr) {
277 *IsValidLdStrOpc =
true;
281 *IsValidLdStrOpc =
false;
282 return std::numeric_limits<unsigned>::max();
283 case AArch64::STRDui:
284 case AArch64::STURDi:
285 case AArch64::STRDpre:
286 case AArch64::STRQui:
287 case AArch64::STURQi:
288 case AArch64::STRQpre:
289 case AArch64::STRBBui:
290 case AArch64::STURBBi:
291 case AArch64::STRHHui:
292 case AArch64::STURHHi:
293 case AArch64::STRWui:
294 case AArch64::STRWpre:
295 case AArch64::STURWi:
296 case AArch64::STRXui:
297 case AArch64::STRXpre:
298 case AArch64::STURXi:
299 case AArch64::STR_ZXI:
300 case AArch64::LDRDui:
301 case AArch64::LDURDi:
302 case AArch64::LDRDpre:
303 case AArch64::LDRQui:
304 case AArch64::LDURQi:
305 case AArch64::LDRQpre:
306 case AArch64::LDRWui:
307 case AArch64::LDURWi:
308 case AArch64::LDRWpre:
309 case AArch64::LDRXui:
310 case AArch64::LDURXi:
311 case AArch64::LDRXpre:
312 case AArch64::STRSui:
313 case AArch64::STURSi:
314 case AArch64::STRSpre:
315 case AArch64::LDRSui:
316 case AArch64::LDURSi:
317 case AArch64::LDRSpre:
318 case AArch64::LDR_ZXI:
320 case AArch64::LDRSWui:
321 return AArch64::LDRWui;
322 case AArch64::LDURSWi:
323 return AArch64::LDURWi;
324 case AArch64::LDRSWpre:
325 return AArch64::LDRWpre;
333 case AArch64::STRBBui:
334 return AArch64::STRHHui;
335 case AArch64::STRHHui:
336 return AArch64::STRWui;
337 case AArch64::STURBBi:
338 return AArch64::STURHHi;
339 case AArch64::STURHHi:
340 return AArch64::STURWi;
341 case AArch64::STURWi:
342 return AArch64::STURXi;
343 case AArch64::STRWui:
344 return AArch64::STRXui;
352 case AArch64::STRSui:
353 case AArch64::STURSi:
354 return AArch64::STPSi;
355 case AArch64::STRSpre:
356 return AArch64::STPSpre;
357 case AArch64::STRDui:
358 case AArch64::STURDi:
359 return AArch64::STPDi;
360 case AArch64::STRDpre:
361 return AArch64::STPDpre;
362 case AArch64::STRQui:
363 case AArch64::STURQi:
364 case AArch64::STR_ZXI:
365 return AArch64::STPQi;
366 case AArch64::STRQpre:
367 return AArch64::STPQpre;
368 case AArch64::STRWui:
369 case AArch64::STURWi:
370 return AArch64::STPWi;
371 case AArch64::STRWpre:
372 return AArch64::STPWpre;
373 case AArch64::STRXui:
374 case AArch64::STURXi:
375 return AArch64::STPXi;
376 case AArch64::STRXpre:
377 return AArch64::STPXpre;
378 case AArch64::LDRSui:
379 case AArch64::LDURSi:
380 return AArch64::LDPSi;
381 case AArch64::LDRSpre:
382 return AArch64::LDPSpre;
383 case AArch64::LDRDui:
384 case AArch64::LDURDi:
385 return AArch64::LDPDi;
386 case AArch64::LDRDpre:
387 return AArch64::LDPDpre;
388 case AArch64::LDRQui:
389 case AArch64::LDURQi:
390 case AArch64::LDR_ZXI:
391 return AArch64::LDPQi;
392 case AArch64::LDRQpre:
393 return AArch64::LDPQpre;
394 case AArch64::LDRWui:
395 case AArch64::LDURWi:
396 return AArch64::LDPWi;
397 case AArch64::LDRWpre:
398 return AArch64::LDPWpre;
399 case AArch64::LDRXui:
400 case AArch64::LDURXi:
401 return AArch64::LDPXi;
402 case AArch64::LDRXpre:
403 return AArch64::LDPXpre;
404 case AArch64::LDRSWui:
405 case AArch64::LDURSWi:
406 return AArch64::LDPSWi;
407 case AArch64::LDRSWpre:
408 return AArch64::LDPSWpre;
419 case AArch64::LDRBBui:
420 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
421 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
422 case AArch64::LDURBBi:
423 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
424 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
425 case AArch64::LDRHHui:
426 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
427 StOpc == AArch64::STRXui;
428 case AArch64::LDURHHi:
429 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
430 StOpc == AArch64::STURXi;
431 case AArch64::LDRWui:
432 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
433 case AArch64::LDURWi:
434 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
435 case AArch64::LDRXui:
436 return StOpc == AArch64::STRXui;
437 case AArch64::LDURXi:
438 return StOpc == AArch64::STURXi;
450 case AArch64::STRSui:
451 return AArch64::STRSpre;
452 case AArch64::STRDui:
453 return AArch64::STRDpre;
454 case AArch64::STRQui:
455 return AArch64::STRQpre;
456 case AArch64::STRBBui:
457 return AArch64::STRBBpre;
458 case AArch64::STRHHui:
459 return AArch64::STRHHpre;
460 case AArch64::STRWui:
461 return AArch64::STRWpre;
462 case AArch64::STRXui:
463 return AArch64::STRXpre;
464 case AArch64::LDRSui:
465 return AArch64::LDRSpre;
466 case AArch64::LDRDui:
467 return AArch64::LDRDpre;
468 case AArch64::LDRQui:
469 return AArch64::LDRQpre;
470 case AArch64::LDRBBui:
471 return AArch64::LDRBBpre;
472 case AArch64::LDRHHui:
473 return AArch64::LDRHHpre;
474 case AArch64::LDRWui:
475 return AArch64::LDRWpre;
476 case AArch64::LDRXui:
477 return AArch64::LDRXpre;
478 case AArch64::LDRSWui:
479 return AArch64::LDRSWpre;
481 return AArch64::LDPSpre;
482 case AArch64::LDPSWi:
483 return AArch64::LDPSWpre;
485 return AArch64::LDPDpre;
487 return AArch64::LDPQpre;
489 return AArch64::LDPWpre;
491 return AArch64::LDPXpre;
493 return AArch64::STPSpre;
495 return AArch64::STPDpre;
497 return AArch64::STPQpre;
499 return AArch64::STPWpre;
501 return AArch64::STPXpre;
503 return AArch64::STGPreIndex;
505 return AArch64::STZGPreIndex;
507 return AArch64::ST2GPreIndex;
508 case AArch64::STZ2Gi:
509 return AArch64::STZ2GPreIndex;
511 return AArch64::STGPpre;
520 case AArch64::LDRBroX:
521 return AArch64::LDRBui;
522 case AArch64::LDRBBroX:
523 return AArch64::LDRBBui;
524 case AArch64::LDRSBXroX:
525 return AArch64::LDRSBXui;
526 case AArch64::LDRSBWroX:
527 return AArch64::LDRSBWui;
528 case AArch64::LDRHroX:
529 return AArch64::LDRHui;
530 case AArch64::LDRHHroX:
531 return AArch64::LDRHHui;
532 case AArch64::LDRSHXroX:
533 return AArch64::LDRSHXui;
534 case AArch64::LDRSHWroX:
535 return AArch64::LDRSHWui;
536 case AArch64::LDRWroX:
537 return AArch64::LDRWui;
538 case AArch64::LDRSroX:
539 return AArch64::LDRSui;
540 case AArch64::LDRSWroX:
541 return AArch64::LDRSWui;
542 case AArch64::LDRDroX:
543 return AArch64::LDRDui;
544 case AArch64::LDRXroX:
545 return AArch64::LDRXui;
546 case AArch64::LDRQroX:
547 return AArch64::LDRQui;
555 case AArch64::STRSui:
556 case AArch64::STURSi:
557 return AArch64::STRSpost;
558 case AArch64::STRDui:
559 case AArch64::STURDi:
560 return AArch64::STRDpost;
561 case AArch64::STRQui:
562 case AArch64::STURQi:
563 return AArch64::STRQpost;
564 case AArch64::STRBBui:
565 return AArch64::STRBBpost;
566 case AArch64::STRHHui:
567 return AArch64::STRHHpost;
568 case AArch64::STRWui:
569 case AArch64::STURWi:
570 return AArch64::STRWpost;
571 case AArch64::STRXui:
572 case AArch64::STURXi:
573 return AArch64::STRXpost;
574 case AArch64::LDRSui:
575 case AArch64::LDURSi:
576 return AArch64::LDRSpost;
577 case AArch64::LDRDui:
578 case AArch64::LDURDi:
579 return AArch64::LDRDpost;
580 case AArch64::LDRQui:
581 case AArch64::LDURQi:
582 return AArch64::LDRQpost;
583 case AArch64::LDRBBui:
584 return AArch64::LDRBBpost;
585 case AArch64::LDRHHui:
586 return AArch64::LDRHHpost;
587 case AArch64::LDRWui:
588 case AArch64::LDURWi:
589 return AArch64::LDRWpost;
590 case AArch64::LDRXui:
591 case AArch64::LDURXi:
592 return AArch64::LDRXpost;
593 case AArch64::LDRSWui:
594 return AArch64::LDRSWpost;
596 return AArch64::LDPSpost;
597 case AArch64::LDPSWi:
598 return AArch64::LDPSWpost;
600 return AArch64::LDPDpost;
602 return AArch64::LDPQpost;
604 return AArch64::LDPWpost;
606 return AArch64::LDPXpost;
608 return AArch64::STPSpost;
610 return AArch64::STPDpost;
612 return AArch64::STPQpost;
614 return AArch64::STPWpost;
616 return AArch64::STPXpost;
618 return AArch64::STGPostIndex;
620 return AArch64::STZGPostIndex;
622 return AArch64::ST2GPostIndex;
623 case AArch64::STZ2Gi:
624 return AArch64::STZ2GPostIndex;
626 return AArch64::STGPpost;
633 unsigned OpcB =
MI.getOpcode();
638 case AArch64::STRSpre:
639 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
640 case AArch64::STRDpre:
641 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
642 case AArch64::STRQpre:
643 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
644 case AArch64::STRWpre:
645 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
646 case AArch64::STRXpre:
647 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
648 case AArch64::LDRSpre:
649 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
650 case AArch64::LDRDpre:
651 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
652 case AArch64::LDRQpre:
653 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
654 case AArch64::LDRWpre:
655 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
656 case AArch64::LDRXpre:
657 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
658 case AArch64::LDRSWpre:
659 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
665 int &MinOffset,
int &MaxOffset) {
683 unsigned PairedRegOp = 0) {
684 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
690 return MI.getOperand(Idx);
699 int UnscaledStOffset =
703 int UnscaledLdOffset =
707 return (UnscaledStOffset <= UnscaledLdOffset) &&
708 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
712 unsigned Opc =
MI.getOpcode();
713 return (
Opc == AArch64::STRWui ||
Opc == AArch64::STURWi ||
714 isNarrowStore(
Opc)) &&
719 switch (
MI.getOpcode()) {
723 case AArch64::LDRBBui:
724 case AArch64::LDRHHui:
725 case AArch64::LDRWui:
726 case AArch64::LDRXui:
728 case AArch64::LDURBBi:
729 case AArch64::LDURHHi:
730 case AArch64::LDURWi:
731 case AArch64::LDURXi:
737 unsigned Opc =
MI.getOpcode();
742 case AArch64::STRSui:
743 case AArch64::STRDui:
744 case AArch64::STRQui:
745 case AArch64::STRXui:
746 case AArch64::STRWui:
747 case AArch64::STRHHui:
748 case AArch64::STRBBui:
749 case AArch64::LDRSui:
750 case AArch64::LDRDui:
751 case AArch64::LDRQui:
752 case AArch64::LDRXui:
753 case AArch64::LDRWui:
754 case AArch64::LDRHHui:
755 case AArch64::LDRBBui:
759 case AArch64::STZ2Gi:
762 case AArch64::STURSi:
763 case AArch64::STURDi:
764 case AArch64::STURQi:
765 case AArch64::STURWi:
766 case AArch64::STURXi:
767 case AArch64::LDURSi:
768 case AArch64::LDURDi:
769 case AArch64::LDURQi:
770 case AArch64::LDURWi:
771 case AArch64::LDURXi:
774 case AArch64::LDPSWi:
803 unsigned Opc =
MI.getOpcode();
809 case AArch64::LDRBroX:
810 case AArch64::LDRBBroX:
811 case AArch64::LDRSBXroX:
812 case AArch64::LDRSBWroX:
815 case AArch64::LDRHroX:
816 case AArch64::LDRHHroX:
817 case AArch64::LDRSHXroX:
818 case AArch64::LDRSHWroX:
821 case AArch64::LDRWroX:
822 case AArch64::LDRSroX:
823 case AArch64::LDRSWroX:
826 case AArch64::LDRDroX:
827 case AArch64::LDRXroX:
830 case AArch64::LDRQroX:
840 case AArch64::ORRWrs:
841 case AArch64::ADDWri:
849 const LdStPairFlags &Flags) {
851 "Expected promotable zero stores.");
859 if (NextI == MergeMI)
862 unsigned Opc =
I->getOpcode();
863 unsigned MergeMIOpc = MergeMI->getOpcode();
864 bool IsScaled = !
TII->hasUnscaledLdStOffset(
Opc);
865 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
866 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
867 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
869 bool MergeForward =
Flags.getMergeForward();
875 const MachineOperand &BaseRegOp =
877 : AArch64InstrInfo::getLdStBaseOp(*
I);
880 int64_t IOffsetInBytes =
882 int64_t MIOffsetInBytes =
887 if (IOffsetInBytes > MIOffsetInBytes)
888 OffsetImm = MIOffsetInBytes;
890 OffsetImm = IOffsetInBytes;
895 if (!
TII->hasUnscaledLdStOffset(NewOpcode)) {
896 int NewOffsetStride =
TII->getMemScale(NewOpcode);
897 assert(((OffsetImm % NewOffsetStride) == 0) &&
898 "Offset should be a multiple of the store memory scale");
899 OffsetImm = OffsetImm / NewOffsetStride;
905 MachineInstrBuilder MIB;
907 .
addReg(isNarrowStore(
Opc) ? AArch64::WZR : AArch64::XZR)
911 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
914 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
923 I->eraseFromParent();
924 MergeMI->eraseFromParent();
934 auto MBB =
MI.getParent();
942 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
943 TRI->regsOverlap(MOP.getReg(), DefReg);
957 if (MOP.isReg() && MOP.isKill())
961 if (MOP.isReg() && !MOP.isKill())
962 Units.
addReg(MOP.getReg());
969 unsigned InstrNumToSet,
976 unsigned OperandNo = 0;
977 bool RegFound =
false;
978 for (
const auto Op : MergedInstr.
operands()) {
979 if (
Op.getReg() ==
Reg) {
988 {InstrNumToSet, OperandNo});
994 const LdStPairFlags &Flags) {
1001 if (NextI == Paired)
1004 int SExtIdx =
Flags.getSExtIdx();
1007 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
Opc);
1008 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
1010 bool MergeForward =
Flags.getMergeForward();
1012 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
1015 DefinedInBB.
addReg(*RenameReg);
1019 auto GetMatchingSubReg =
1020 [
this, RenameReg](
const TargetRegisterClass *
C) ->
MCPhysReg {
1022 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1023 if (
C->contains(SubOrSuper))
1029 std::function<bool(MachineInstr &,
bool)> UpdateMIs =
1030 [
this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &
MI,
1033 bool SeenDef =
false;
1035 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1039 (!MergeForward || !SeenDef ||
1041 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1044 "Need renamable operands");
1046 if (
const TargetRegisterClass *RC =
1048 MatchingReg = GetMatchingSubReg(RC);
1052 MatchingReg = GetMatchingSubReg(
1053 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1061 MachineOperand &MOP =
MI.getOperand(
OpIdx);
1063 TRI->regsOverlap(MOP.
getReg(), RegToRename)) {
1066 "Need renamable operands");
1068 if (
const TargetRegisterClass *RC =
1070 MatchingReg = GetMatchingSubReg(RC);
1072 MatchingReg = GetMatchingSubReg(
1073 TRI->getMinimalPhysRegClass(MOP.
getReg()));
1074 assert(MatchingReg != AArch64::NoRegister &&
1075 "Cannot find matching regs for renaming");
1084 TRI, UINT32_MAX, UpdateMIs);
1097 RegToCheck = RegToRename;
1100 MergeForward ? std::next(
I) :
I,
1101 MergeForward ? std::next(Paired) : Paired))
1103 [
this, RegToCheck](
const MachineOperand &MOP) {
1104 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1106 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1108 "Rename register used between paired instruction, trashing the "
1118 const MachineOperand &BaseRegOp =
1120 : AArch64InstrInfo::getLdStBaseOp(*
I);
1124 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
1125 if (IsUnscaled != PairedIsUnscaled) {
1129 int MemSize =
TII->getMemScale(*Paired);
1130 if (PairedIsUnscaled) {
1133 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
1134 "Offset should be a multiple of the stride!");
1135 PairedOffset /= MemSize;
1137 PairedOffset *= MemSize;
1144 MachineInstr *RtMI, *Rt2MI;
1145 if (
Offset == PairedOffset + OffsetStride &&
1153 SExtIdx = (SExtIdx + 1) % 2;
1161 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
1162 "Unscaled offset cannot be scaled.");
1163 OffsetImm /=
TII->getMemScale(*RtMI);
1167 MachineInstrBuilder MIB;
1172 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1174 if (RegOp0.
isUse()) {
1175 if (!MergeForward) {
1186 for (
auto It = std::next(
I); It != Paired && PairedRegOp.
isKill(); ++It)
1187 if (It->readsRegister(PairedRegOp.
getReg(),
TRI))
1195 for (MachineInstr &
MI :
1196 make_range(std::next(
I->getIterator()), Paired->getIterator()))
1213 .setMIFlags(
I->mergeFlagsWith(*Paired));
1218 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1223 if (SExtIdx != -1) {
1228 MachineOperand &DstMO = MIB->
getOperand(SExtIdx);
1233 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1242 MachineInstrBuilder MIBKill =
1243 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1248 MachineInstrBuilder MIBSXTW =
1249 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1289 if (
I->peekDebugInstrNum()) {
1297 unsigned NewInstrNum;
1298 if (DstRegX ==
I->getOperand(0).getReg()) {
1307 if (Paired->peekDebugInstrNum()) {
1315 unsigned NewInstrNum;
1316 if (DstRegX == Paired->getOperand(0).getReg()) {
1329 }
else if (
Opc == AArch64::LDR_ZXI ||
Opc == AArch64::STR_ZXI) {
1335 AArch64::ZPRRegClass.contains(MOp1.
getReg()) &&
"Invalid register.");
1336 MOp0.
setReg(AArch64::Q0 + (MOp0.
getReg() - AArch64::Z0));
1337 MOp1.
setReg(AArch64::Q0 + (MOp1.
getReg() - AArch64::Z0));
1368 if (
I->peekDebugInstrNum()) {
1373 if (Paired->peekDebugInstrNum()) {
1393 SmallSetVector<Register, 4>
Ops;
1394 for (
const MachineOperand &MO :
1396 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1397 Ops.insert(MO.getReg());
1398 for (
const MachineOperand &MO :
1400 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1401 Ops.insert(MO.getReg());
1405 CopyImplicitOps(
I, Paired);
1408 I->eraseFromParent();
1409 Paired->eraseFromParent();
1418 next_nodbg(LoadI, LoadI->getParent()->end());
1420 int LoadSize =
TII->getMemScale(*LoadI);
1421 int StoreSize =
TII->getMemScale(*StoreI);
1425 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1428 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1429 "Unexpected RegClass");
1431 MachineInstr *BitExtMI;
1432 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1435 if (StRt == LdRt && LoadSize == 8) {
1436 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1437 LoadI->getIterator())) {
1438 if (
MI.killsRegister(StRt,
TRI)) {
1439 MI.clearRegisterKills(StRt,
TRI);
1446 LoadI->eraseFromParent();
1451 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1452 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1453 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1462 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1463 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1464 "Unsupported ld/st match");
1465 assert(LoadSize <= StoreSize &&
"Invalid load size");
1466 int UnscaledLdOffset =
1470 int UnscaledStOffset =
1474 int Width = LoadSize * 8;
1477 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1480 assert((UnscaledLdOffset >= UnscaledStOffset &&
1481 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1484 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1485 int Imms = Immr + Width - 1;
1486 if (UnscaledLdOffset == UnscaledStOffset) {
1487 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1493 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1494 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1499 }
else if (IsStoreXReg && Imms == 31) {
1502 assert(Immr <= Imms &&
"Expected LSR alias of UBFM");
1503 BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1504 TII->get(AArch64::UBFMWri),
1505 TRI->getSubReg(DestReg, AArch64::sub_32))
1506 .
addReg(
TRI->getSubReg(StRt, AArch64::sub_32))
1512 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1513 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1523 for (MachineInstr &
MI :
make_range(StoreI->getIterator(),
1525 if (
MI.killsRegister(StRt,
TRI)) {
1526 MI.clearRegisterKills(StRt,
TRI);
1541 LoadI->eraseFromParent();
1551 if (
Offset % OffsetStride)
1563 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1580bool AArch64LoadStoreOpt::findMatchingStore(
1585 MachineInstr &LoadMI = *
I;
1595 ModifiedRegUnits.
clear();
1596 UsedRegUnits.
clear();
1601 MachineInstr &
MI = *
MBBI;
1605 if (!
MI.isTransient())
1631 if (!ModifiedRegUnits.
available(BaseReg))
1649 LdStPairFlags &Flags,
1652 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1657 !
TII->isLdStPairSuppressed(FirstMI) &&
1658 "FirstMI shouldn't get here if either of these checks are true.");
1665 unsigned OpcB =
MI.getOpcode();
1673 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1674 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1682 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1684 assert(IsValidLdStrOpc &&
1685 "Given Opc should be a Load or Store with an immediate");
1688 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1694 if (!PairIsValidLdStrOpc)
1699 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1702 TII->getMemScale(FirstMI) ==
TII->getMemScale(
MI);
1711 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1720 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1727 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1728 (
TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1729 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1730 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1733 <<
" Cannot rename operands with multiple disjunct subregisters ("
1744 return TRI->isSuperOrSubRegisterEq(
1767 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1768 MOP.isImplicit() && MOP.isKill() &&
1769 TRI->regsOverlap(RegToRename, MOP.getReg());
1775 bool FoundDef =
false;
1806 if (
MI.isPseudo()) {
1807 LLVM_DEBUG(
dbgs() <<
" Cannot rename pseudo/bundle instruction\n");
1811 for (
auto &MOP :
MI.operands()) {
1813 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1823 for (
auto &MOP :
MI.operands()) {
1825 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1842 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1870 LLVM_DEBUG(dbgs() <<
"Checking " << MI);
1872 if (MI.getFlag(MachineInstr::FrameSetup)) {
1873 LLVM_DEBUG(dbgs() <<
" Cannot rename framesetup instructions "
1878 for (
auto &MOP :
MI.operands()) {
1879 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1880 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1882 if (!canRenameMOP(MOP, TRI)) {
1883 LLVM_DEBUG(dbgs() <<
" Cannot rename " << MOP <<
" in " << MI);
1909 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1910 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1912 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1918 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1921 TRI->sub_and_superregs_inclusive(PR),
1922 [
C](
MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1926 auto *RegClass =
TRI->getMinimalPhysRegClass(
Reg);
1929 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1930 CanBeUsedForAllClasses(PR)) {
1938 <<
TRI->getRegClassName(RegClass) <<
"\n");
1939 return std::nullopt;
1950 std::optional<MCPhysReg> RenameReg;
1959 const bool IsLoad = FirstMI.
mayLoad();
1961 if (!MaybeCanRename) {
1964 RequiredClasses,
TRI)};
1970 if (*MaybeCanRename) {
1972 RequiredClasses,
TRI);
1981 LdStPairFlags &Flags,
unsigned Limit,
1982 bool FindNarrowMerge) {
1986 MachineInstr &FirstMI = *
I;
1990 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
1994 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
1997 std::optional<bool> MaybeCanRename;
1999 MaybeCanRename = {
false};
2001 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2002 LiveRegUnits UsedInBetween;
2005 Flags.clearRenameReg();
2009 ModifiedRegUnits.
clear();
2010 UsedRegUnits.
clear();
2013 SmallVector<MachineInstr *, 4> MemInsns;
2018 MachineInstr &
MI = *
MBBI;
2025 if (!
MI.isTransient())
2028 Flags.setSExtIdx(-1);
2031 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
2040 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2041 if (IsUnscaled != MIIsUnscaled) {
2045 int MemSize =
TII->getMemScale(
MI);
2049 if (MIOffset % MemSize) {
2055 MIOffset /= MemSize;
2057 MIOffset *= MemSize;
2063 if (BaseReg == MIBaseReg) {
2069 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
2070 bool IsBaseRegUsed = !UsedRegUnits.
available(
2072 bool IsBaseRegModified = !ModifiedRegUnits.
available(
2077 bool IsMIRegTheSame =
2080 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2088 if ((
Offset != MIOffset + OffsetStride) &&
2089 (
Offset + OffsetStride != MIOffset)) {
2098 if (FindNarrowMerge) {
2103 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
2120 <<
"keep looking.\n");
2126 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
2131 <<
"Offset doesn't fit due to alignment requirements, "
2132 <<
"keep looking.\n");
2143 if (!ModifiedRegUnits.
available(BaseReg))
2146 const bool SameLoadReg =
MayLoad &&
TRI->isSuperOrSubRegisterEq(
2153 bool RtNotModified =
2155 bool RtNotUsed = !(
MI.mayLoad() && !SameLoadReg &&
2158 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 2nd into 1st insn:\n"
2160 << (RtNotModified ?
"true" :
"false") <<
"\n"
2162 << (RtNotUsed ?
"true" :
"false") <<
"\n");
2164 if (RtNotModified && RtNotUsed && !
mayAlias(
MI, MemInsns, AA)) {
2169 std::optional<MCPhysReg> RenameReg =
2171 Reg, DefinedInBB, UsedInBetween,
2172 RequiredClasses,
TRI);
2178 <<
"keep looking.\n");
2181 Flags.setRenameReg(*RenameReg);
2184 Flags.setMergeForward(
false);
2186 Flags.clearRenameReg();
2197 LLVM_DEBUG(
dbgs() <<
"Checking, can combine 1st into 2nd insn:\n"
2199 <<
"' not modified: "
2200 << (RtNotModified ?
"true" :
"false") <<
"\n");
2202 if (RtNotModified && !
mayAlias(FirstMI, MemInsns, AA)) {
2204 Flags.setMergeForward(
true);
2205 Flags.clearRenameReg();
2210 MaybeCanRename, FirstMI,
MI,
Reg, DefinedInBB, UsedInBetween,
2211 RequiredClasses,
TRI);
2213 Flags.setMergeForward(
true);
2214 Flags.setRenameReg(*RenameReg);
2215 MBBIWithRenameReg =
MBBI;
2218 LLVM_DEBUG(
dbgs() <<
"Unable to combine these instructions due to "
2219 <<
"interference in between, keep looking.\n");
2223 if (
Flags.getRenameReg())
2224 return MBBIWithRenameReg;
2238 if (!ModifiedRegUnits.
available(BaseReg)) {
2244 if (
MI.mayLoadOrStore())
2252 assert((
MI.getOpcode() == AArch64::SUBXri ||
2253 MI.getOpcode() == AArch64::ADDXri) &&
2254 "Expected a register update instruction");
2255 auto End =
MI.getParent()->end();
2256 if (MaybeCFI == End ||
2257 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2260 MI.getOperand(0).getReg() != AArch64::SP)
2264 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2275std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2277 bool IsForward,
bool IsPreIdx,
bool MergeEither) {
2278 assert((Update->getOpcode() == AArch64::ADDXri ||
2279 Update->getOpcode() == AArch64::SUBXri) &&
2280 "Unexpected base register update instruction to merge!");
2296 if (std::any_of(std::next(CFI),
I, [](
const auto &Insn) {
2297 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2299 return std::nullopt;
2301 MachineBasicBlock *
MBB = InsertPt->getParent();
2310 if (NextI == Update)
2313 int Value = Update->getOperand(2).getImm();
2315 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2316 if (Update->getOpcode() == AArch64::SUBXri)
2321 MachineInstrBuilder MIB;
2322 int Scale, MinOffset, MaxOffset;
2326 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2328 .
add(Update->getOperand(0))
2336 MIB =
BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2338 .
add(Update->getOperand(0))
2363 I->eraseFromParent();
2364 Update->eraseFromParent();
2372 unsigned Offset,
int Scale) {
2373 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2374 "Unexpected const mov instruction to merge!");
2378 MachineInstr &MemMI = *
I;
2379 unsigned Mask = (1 << 12) * Scale - 1;
2384 MachineInstrBuilder AddMIB, MemMIB;
2388 BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(AArch64::ADDXri))
2396 MemMIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
2404 ++NumConstOffsetFolded;
2419 I->eraseFromParent();
2420 PrevI->eraseFromParent();
2421 Update->eraseFromParent();
2426bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2428 unsigned BaseReg,
int Offset) {
2429 switch (
MI.getOpcode()) {
2432 case AArch64::SUBXri:
2433 case AArch64::ADDXri:
2436 if (!
MI.getOperand(2).isImm())
2444 if (
MI.getOperand(0).getReg() != BaseReg ||
2445 MI.getOperand(1).getReg() != BaseReg)
2448 int UpdateOffset =
MI.getOperand(2).getImm();
2449 if (
MI.getOpcode() == AArch64::SUBXri)
2450 UpdateOffset = -UpdateOffset;
2454 int Scale, MinOffset, MaxOffset;
2456 if (UpdateOffset % Scale != 0)
2460 int ScaledOffset = UpdateOffset / Scale;
2461 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2473bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2479 if (
MI.getOpcode() == AArch64::MOVKWi &&
2480 TRI->isSuperOrSubRegisterEq(IndexReg,
MI.getOperand(1).getReg())) {
2489 MachineInstr &MovzMI = *
MBBI;
2491 if (MovzMI.
getOpcode() == AArch64::MOVZWi &&
2494 unsigned High =
MI.getOperand(2).getImm() <<
MI.getOperand(3).getImm();
2497 return Offset >> 24 == 0;
2506 MachineInstr &MemMI = *
I;
2511 TII->getMemScale(MemMI);
2516 if (MIUnscaledOffset != UnscaledOffset)
2527 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2529 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2536 ModifiedRegUnits.
clear();
2537 UsedRegUnits.
clear();
2543 const bool BaseRegSP =
BaseReg == AArch64::SP;
2552 MachineBasicBlock *CurMBB =
I->getParent();
2559 MachineInstr &
MI = *
MBBI;
2563 if (!
MI.isTransient())
2567 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
2578 if (!ModifiedRegUnits.
available(BaseReg) ||
2580 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2584 if (!VisitSucc || Limit <=
Count)
2589 MachineBasicBlock *SuccToVisit =
nullptr;
2590 unsigned LiveSuccCount = 0;
2591 for (MachineBasicBlock *Succ : CurMBB->
successors()) {
2592 for (MCRegAliasIterator AI(BaseReg,
TRI,
true); AI.isValid(); ++AI) {
2593 if (Succ->isLiveIn(*AI)) {
2594 if (LiveSuccCount++)
2596 if (Succ->pred_size() == 1)
2604 CurMBB = SuccToVisit;
2615 MachineInstr &MemMI = *
I;
2617 MachineFunction &MF = *MemMI.
getMF();
2625 : AArch64::NoRegister};
2634 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i)
2635 if (DestReg[i] == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg[i]))
2639 const bool BaseRegSP =
BaseReg == AArch64::SP;
2647 const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
2648 unsigned RedZoneSize =
2653 ModifiedRegUnits.
clear();
2654 UsedRegUnits.
clear();
2656 bool MemAccessBeforeSPPreInc =
false;
2660 MachineInstr &
MI = *
MBBI;
2664 if (!
MI.isTransient())
2668 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2671 if (MemAccessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2681 if (!ModifiedRegUnits.
available(BaseReg) ||
2689 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
2690 (DestReg[0] != AArch64::NoRegister &&
2691 !(ModifiedRegUnits.
available(DestReg[0]) &&
2693 (DestReg[1] != AArch64::NoRegister &&
2694 !(ModifiedRegUnits.
available(DestReg[1]) &&
2696 MergeEither =
false;
2701 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2702 MemAccessBeforeSPPreInc =
true;
2708AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2712 MachineInstr &MemMI = *
I;
2731 ModifiedRegUnits.
clear();
2732 UsedRegUnits.
clear();
2736 MachineInstr &
MI = *
MBBI;
2740 if (!
MI.isTransient())
2744 if (isMatchingMovConstInsn(*
I,
MI, IndexReg,
Offset)) {
2753 if (!ModifiedRegUnits.
available(IndexReg) ||
2761bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2763 MachineInstr &
MI = *
MBBI;
2765 if (
MI.hasOrderedMemoryRef())
2779 ++NumLoadsFromStoresPromoted;
2783 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2790bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2793 MachineInstr &
MI = *
MBBI;
2796 if (!
TII->isCandidateToMergeOrPair(
MI))
2800 LdStPairFlags
Flags;
2804 ++NumZeroStoresPromoted;
2808 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2817 MachineInstr &
MI = *
MBBI;
2820 if (!
TII->isCandidateToMergeOrPair(
MI))
2824 if (
MI.mayLoad() && Subtarget->hasDisableLdp())
2828 if (
MI.mayStore() && Subtarget->hasDisableStp())
2834 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2836 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2844 LdStPairFlags
Flags;
2850 auto Prev = std::prev(
MBBI);
2854 MachineMemOperand *MemOp =
2855 MI.memoperands_empty() ? nullptr :
MI.memoperands().front();
2860 if ((
MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2861 (
MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2863 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2864 NumFailedAlignmentCheck++;
2870 uint64_t MemAlignment = MemOp->getAlign().value();
2871 uint64_t TypeAlignment =
2872 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2874 if (MemAlignment < 2 * TypeAlignment) {
2875 NumFailedAlignmentCheck++;
2881 if (
TII->hasUnscaledLdStOffset(
MI))
2882 ++NumUnscaledPairCreated;
2884 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2887 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2895bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2897 MachineInstr &
MI = *
MBBI;
2909 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2918 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2927 Update = findMatchingUpdateInsnBackward(
MBBI,
UpdateLimit, MergeEither);
2930 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
true,
2931 true, MergeEither)) {
2940 int UnscaledOffset =
2948 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2951 if (
auto NextI = mergeUpdateInsn(
MBBI, Update,
false,
2964 MachineInstr &
MI = *
MBBI;
2969 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2981 if (Update !=
E && (
Offset & (Scale - 1)) == 0) {
2990bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &
MBB,
2991 bool EnableNarrowZeroStOpt) {
3023 if (EnableNarrowZeroStOpt)
3047 DefinedInBB.
clear();
3056 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
3095bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3102 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3112 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3113 for (
auto &
MBB : Fn) {
3134 return new AArch64LoadStoreOpt();
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static bool isRewritableImplicitDef(unsigned Opc)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isLittleEndian() const
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(unsigned CounterName)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.