Go to the documentation of this file.
56 #define DEBUG_TYPE "aarch64-ldst-opt"
58 STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
59 STATISTIC(NumPostFolded,
"Number of post-index updates folded");
60 STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
62 "Number of load/store from unscaled generated");
63 STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
64 STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
67 "Controls which pairs are considered for renaming");
82 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
86 using LdStPairFlags =
struct LdStPairFlags {
90 bool MergeForward =
false;
103 LdStPairFlags() =
default;
105 void setMergeForward(
bool V =
true) { MergeForward = V; }
106 bool getMergeForward()
const {
return MergeForward; }
108 void setSExtIdx(
int V) { SExtIdx = V; }
109 int getSExtIdx()
const {
return SExtIdx; }
111 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
112 void clearRenameReg() { RenameReg =
None; }
141 LdStPairFlags &Flags,
143 bool FindNarrowMerge);
154 const LdStPairFlags &Flags);
160 const LdStPairFlags &Flags);
172 int UnscaledOffset,
unsigned Limit);
183 unsigned BaseReg,
int Offset);
221 static
bool isNarrowStore(
unsigned Opc) {
225 case AArch64::STRBBui:
226 case AArch64::STURBBi:
227 case AArch64::STRHHui:
228 case AArch64::STURHHi:
236 switch (
MI.getOpcode()) {
239 case AArch64::STGOffset:
240 case AArch64::STZGOffset:
241 case AArch64::ST2GOffset:
242 case AArch64::STZ2GOffset:
248 bool *IsValidLdStrOpc =
nullptr) {
250 *IsValidLdStrOpc =
true;
254 *IsValidLdStrOpc =
false;
256 case AArch64::STRDui:
257 case AArch64::STURDi:
258 case AArch64::STRDpre:
259 case AArch64::STRQui:
260 case AArch64::STURQi:
261 case AArch64::STRQpre:
262 case AArch64::STRBBui:
263 case AArch64::STURBBi:
264 case AArch64::STRHHui:
265 case AArch64::STURHHi:
266 case AArch64::STRWui:
267 case AArch64::STRWpre:
268 case AArch64::STURWi:
269 case AArch64::STRXui:
270 case AArch64::STRXpre:
271 case AArch64::STURXi:
272 case AArch64::LDRDui:
273 case AArch64::LDURDi:
274 case AArch64::LDRDpre:
275 case AArch64::LDRQui:
276 case AArch64::LDURQi:
277 case AArch64::LDRQpre:
278 case AArch64::LDRWui:
279 case AArch64::LDURWi:
280 case AArch64::LDRWpre:
281 case AArch64::LDRXui:
282 case AArch64::LDURXi:
283 case AArch64::LDRXpre:
284 case AArch64::STRSui:
285 case AArch64::STURSi:
286 case AArch64::STRSpre:
287 case AArch64::LDRSui:
288 case AArch64::LDURSi:
289 case AArch64::LDRSpre:
291 case AArch64::LDRSWui:
292 return AArch64::LDRWui;
293 case AArch64::LDURSWi:
294 return AArch64::LDURWi;
302 case AArch64::STRBBui:
303 return AArch64::STRHHui;
304 case AArch64::STRHHui:
305 return AArch64::STRWui;
306 case AArch64::STURBBi:
307 return AArch64::STURHHi;
308 case AArch64::STURHHi:
309 return AArch64::STURWi;
310 case AArch64::STURWi:
311 return AArch64::STURXi;
312 case AArch64::STRWui:
313 return AArch64::STRXui;
321 case AArch64::STRSui:
322 case AArch64::STURSi:
323 return AArch64::STPSi;
324 case AArch64::STRSpre:
325 return AArch64::STPSpre;
326 case AArch64::STRDui:
327 case AArch64::STURDi:
328 return AArch64::STPDi;
329 case AArch64::STRDpre:
330 return AArch64::STPDpre;
331 case AArch64::STRQui:
332 case AArch64::STURQi:
333 return AArch64::STPQi;
334 case AArch64::STRQpre:
335 return AArch64::STPQpre;
336 case AArch64::STRWui:
337 case AArch64::STURWi:
338 return AArch64::STPWi;
339 case AArch64::STRWpre:
340 return AArch64::STPWpre;
341 case AArch64::STRXui:
342 case AArch64::STURXi:
343 return AArch64::STPXi;
344 case AArch64::STRXpre:
345 return AArch64::STPXpre;
346 case AArch64::LDRSui:
347 case AArch64::LDURSi:
348 return AArch64::LDPSi;
349 case AArch64::LDRSpre:
350 return AArch64::LDPSpre;
351 case AArch64::LDRDui:
352 case AArch64::LDURDi:
353 return AArch64::LDPDi;
354 case AArch64::LDRDpre:
355 return AArch64::LDPDpre;
356 case AArch64::LDRQui:
357 case AArch64::LDURQi:
358 return AArch64::LDPQi;
359 case AArch64::LDRQpre:
360 return AArch64::LDPQpre;
361 case AArch64::LDRWui:
362 case AArch64::LDURWi:
363 return AArch64::LDPWi;
364 case AArch64::LDRWpre:
365 return AArch64::LDPWpre;
366 case AArch64::LDRXui:
367 case AArch64::LDURXi:
368 return AArch64::LDPXi;
369 case AArch64::LDRXpre:
370 return AArch64::LDPXpre;
371 case AArch64::LDRSWui:
372 case AArch64::LDURSWi:
373 return AArch64::LDPSWi;
384 case AArch64::LDRBBui:
385 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
386 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
387 case AArch64::LDURBBi:
388 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
389 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
390 case AArch64::LDRHHui:
391 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
392 StOpc == AArch64::STRXui;
393 case AArch64::LDURHHi:
394 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
395 StOpc == AArch64::STURXi;
396 case AArch64::LDRWui:
397 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
398 case AArch64::LDURWi:
399 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
400 case AArch64::LDRXui:
401 return StOpc == AArch64::STRXui;
402 case AArch64::LDURXi:
403 return StOpc == AArch64::STURXi;
415 case AArch64::STRSui:
416 return AArch64::STRSpre;
417 case AArch64::STRDui:
418 return AArch64::STRDpre;
419 case AArch64::STRQui:
420 return AArch64::STRQpre;
421 case AArch64::STRBBui:
422 return AArch64::STRBBpre;
423 case AArch64::STRHHui:
424 return AArch64::STRHHpre;
425 case AArch64::STRWui:
426 return AArch64::STRWpre;
427 case AArch64::STRXui:
428 return AArch64::STRXpre;
429 case AArch64::LDRSui:
430 return AArch64::LDRSpre;
431 case AArch64::LDRDui:
432 return AArch64::LDRDpre;
433 case AArch64::LDRQui:
434 return AArch64::LDRQpre;
435 case AArch64::LDRBBui:
436 return AArch64::LDRBBpre;
437 case AArch64::LDRHHui:
438 return AArch64::LDRHHpre;
439 case AArch64::LDRWui:
440 return AArch64::LDRWpre;
441 case AArch64::LDRXui:
442 return AArch64::LDRXpre;
443 case AArch64::LDRSWui:
444 return AArch64::LDRSWpre;
446 return AArch64::LDPSpre;
447 case AArch64::LDPSWi:
448 return AArch64::LDPSWpre;
450 return AArch64::LDPDpre;
452 return AArch64::LDPQpre;
454 return AArch64::LDPWpre;
456 return AArch64::LDPXpre;
458 return AArch64::STPSpre;
460 return AArch64::STPDpre;
462 return AArch64::STPQpre;
464 return AArch64::STPWpre;
466 return AArch64::STPXpre;
467 case AArch64::STGOffset:
468 return AArch64::STGPreIndex;
469 case AArch64::STZGOffset:
470 return AArch64::STZGPreIndex;
471 case AArch64::ST2GOffset:
472 return AArch64::ST2GPreIndex;
473 case AArch64::STZ2GOffset:
474 return AArch64::STZ2GPreIndex;
476 return AArch64::STGPpre;
484 case AArch64::STRSui:
485 case AArch64::STURSi:
486 return AArch64::STRSpost;
487 case AArch64::STRDui:
488 case AArch64::STURDi:
489 return AArch64::STRDpost;
490 case AArch64::STRQui:
491 case AArch64::STURQi:
492 return AArch64::STRQpost;
493 case AArch64::STRBBui:
494 return AArch64::STRBBpost;
495 case AArch64::STRHHui:
496 return AArch64::STRHHpost;
497 case AArch64::STRWui:
498 case AArch64::STURWi:
499 return AArch64::STRWpost;
500 case AArch64::STRXui:
501 case AArch64::STURXi:
502 return AArch64::STRXpost;
503 case AArch64::LDRSui:
504 case AArch64::LDURSi:
505 return AArch64::LDRSpost;
506 case AArch64::LDRDui:
507 case AArch64::LDURDi:
508 return AArch64::LDRDpost;
509 case AArch64::LDRQui:
510 case AArch64::LDURQi:
511 return AArch64::LDRQpost;
512 case AArch64::LDRBBui:
513 return AArch64::LDRBBpost;
514 case AArch64::LDRHHui:
515 return AArch64::LDRHHpost;
516 case AArch64::LDRWui:
517 case AArch64::LDURWi:
518 return AArch64::LDRWpost;
519 case AArch64::LDRXui:
520 case AArch64::LDURXi:
521 return AArch64::LDRXpost;
522 case AArch64::LDRSWui:
523 return AArch64::LDRSWpost;
525 return AArch64::LDPSpost;
526 case AArch64::LDPSWi:
527 return AArch64::LDPSWpost;
529 return AArch64::LDPDpost;
531 return AArch64::LDPQpost;
533 return AArch64::LDPWpost;
535 return AArch64::LDPXpost;
537 return AArch64::STPSpost;
539 return AArch64::STPDpost;
541 return AArch64::STPQpost;
543 return AArch64::STPWpost;
545 return AArch64::STPXpost;
546 case AArch64::STGOffset:
547 return AArch64::STGPostIndex;
548 case AArch64::STZGOffset:
549 return AArch64::STZGPostIndex;
550 case AArch64::ST2GOffset:
551 return AArch64::ST2GPostIndex;
552 case AArch64::STZ2GOffset:
553 return AArch64::STZ2GPostIndex;
555 return AArch64::STGPpost;
560 switch (
MI.getOpcode()) {
564 case AArch64::LDPSWi:
582 unsigned OpcB =
MI.getOpcode();
587 case AArch64::STRSpre:
588 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
589 case AArch64::STRDpre:
590 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
591 case AArch64::STRQpre:
592 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
593 case AArch64::STRWpre:
594 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
595 case AArch64::STRXpre:
596 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
597 case AArch64::LDRSpre:
598 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
599 case AArch64::LDRDpre:
600 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
601 case AArch64::LDRQpre:
602 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
603 case AArch64::LDRWpre:
604 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
605 case AArch64::LDRXpre:
606 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
612 int &MinOffset,
int &MaxOffset) {
630 unsigned PairedRegOp = 0) {
631 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
636 return MI.getOperand(Idx);
641 return MI.getOperand(Idx);
646 return MI.getOperand(Idx);
655 int UnscaledStOffset =
TII->hasUnscaledLdStOffset(
StoreInst)
658 int UnscaledLdOffset =
TII->hasUnscaledLdStOffset(
LoadInst)
661 return (UnscaledStOffset <= UnscaledLdOffset) &&
662 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
666 unsigned Opc =
MI.getOpcode();
667 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
668 isNarrowStore(Opc)) &&
673 switch (
MI.getOpcode()) {
677 case AArch64::LDRBBui:
678 case AArch64::LDRHHui:
679 case AArch64::LDRWui:
680 case AArch64::LDRXui:
682 case AArch64::LDURBBi:
683 case AArch64::LDURHHi:
684 case AArch64::LDURWi:
685 case AArch64::LDURXi:
691 unsigned Opc =
MI.getOpcode();
696 case AArch64::STRSui:
697 case AArch64::STRDui:
698 case AArch64::STRQui:
699 case AArch64::STRXui:
700 case AArch64::STRWui:
701 case AArch64::STRHHui:
702 case AArch64::STRBBui:
703 case AArch64::LDRSui:
704 case AArch64::LDRDui:
705 case AArch64::LDRQui:
706 case AArch64::LDRXui:
707 case AArch64::LDRWui:
708 case AArch64::LDRHHui:
709 case AArch64::LDRBBui:
710 case AArch64::STGOffset:
711 case AArch64::STZGOffset:
712 case AArch64::ST2GOffset:
713 case AArch64::STZ2GOffset:
716 case AArch64::STURSi:
717 case AArch64::STURDi:
718 case AArch64::STURQi:
719 case AArch64::STURWi:
720 case AArch64::STURXi:
721 case AArch64::LDURSi:
722 case AArch64::LDURDi:
723 case AArch64::LDURQi:
724 case AArch64::LDURWi:
725 case AArch64::LDURXi:
728 case AArch64::LDPSWi:
749 const LdStPairFlags &Flags) {
751 "Expected promotable zero stores.");
759 if (NextI == MergeMI)
762 unsigned Opc =
I->getOpcode();
763 bool IsScaled = !
TII->hasUnscaledLdStOffset(Opc);
764 int OffsetStride = IsScaled ? 1 :
TII->getMemScale(*
I);
766 bool MergeForward = Flags.getMergeForward();
786 assert(((OffsetImm & 1) == 0) &&
"Unexpected offset to merge");
795 .
addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
799 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
802 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
811 I->eraseFromParent();
812 MergeMI->eraseFromParent();
822 auto MBB =
MI.getParent();
830 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
831 TRI->regsOverlap(MOP.getReg(), DefReg);
845 if (MOP.isReg() && MOP.isKill())
849 if (MOP.isReg() && !MOP.isKill())
850 Units.
addReg(MOP.getReg());
856 const LdStPairFlags &Flags) {
866 int SExtIdx = Flags.getSExtIdx();
869 bool IsUnscaled =
TII->hasUnscaledLdStOffset(Opc);
870 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
872 bool MergeForward = Flags.getMergeForward();
875 if (MergeForward && RenameReg) {
877 DefinedInBB.addReg(*RenameReg);
881 auto GetMatchingSubReg = [
this,
891 [
this, RegToRename, GetMatchingSubReg](
MachineInstr &
MI,
bool IsDef) {
893 bool SeenDef =
false;
894 for (
auto &MOP :
MI.operands()) {
897 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
898 (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
900 assert((MOP.isImplicit() ||
901 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
902 "Need renamable operands");
903 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
908 for (
auto &MOP :
MI.operands()) {
909 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
911 assert((MOP.isImplicit() ||
912 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
913 "Need renamable operands");
914 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
929 std::next(
I), std::next(Paired)))
932 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
934 !TRI->regsOverlap(MOP.getReg(), *RenameReg);
936 "Rename register used between paired instruction, trashing the "
951 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
952 if (IsUnscaled != PairedIsUnscaled) {
956 int MemSize =
TII->getMemScale(*Paired);
957 if (PairedIsUnscaled) {
960 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
961 "Offset should be a multiple of the stride!");
962 PairedOffset /= MemSize;
964 PairedOffset *= MemSize;
972 if (Offset == PairedOffset + OffsetStride &&
980 SExtIdx = (SExtIdx + 1) % 2;
988 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
989 "Unscaled offset cannot be scaled.");
990 OffsetImm /=
TII->getMemScale(*RtMI);
1000 if (RegOp0.
isUse()) {
1001 if (!MergeForward) {
1031 .setMIFlags(
I->mergeFlagsWith(*Paired));
1036 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1041 if (SExtIdx != -1) {
1061 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1067 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1081 if (MOP.isReg() && MOP.isKill())
1082 DefinedInBB.addReg(MOP.getReg());
1085 I->eraseFromParent();
1086 Paired->eraseFromParent();
1095 next_nodbg(LoadI, LoadI->getParent()->end());
1097 int LoadSize =
TII->getMemScale(*LoadI);
1098 int StoreSize =
TII->getMemScale(*StoreI);
1106 "Unexpected RegClass");
1109 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1112 if (StRt == LdRt && LoadSize == 8) {
1114 LoadI->getIterator())) {
1115 if (
MI.killsRegister(StRt,
TRI)) {
1116 MI.clearRegisterKills(StRt,
TRI);
1123 LoadI->eraseFromParent();
1128 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1129 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1130 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1137 if (!Subtarget->isLittleEndian())
1139 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1140 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1141 "Unsupported ld/st match");
1142 assert(LoadSize <= StoreSize &&
"Invalid load size");
1143 int UnscaledLdOffset = IsUnscaled
1146 int UnscaledStOffset = IsUnscaled
1149 int Width = LoadSize * 8;
1152 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1155 assert((UnscaledLdOffset >= UnscaledStOffset &&
1156 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1159 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1160 int Imms = Immr +
Width - 1;
1161 if (UnscaledLdOffset == UnscaledStOffset) {
1162 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1168 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1169 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1176 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1177 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1189 if (
MI.killsRegister(StRt,
TRI)) {
1190 MI.clearRegisterKills(StRt,
TRI);
1205 LoadI->eraseFromParent();
1215 if (Offset % OffsetStride)
1217 Offset /= OffsetStride;
1219 return Offset <= 63 && Offset >= -64;
1227 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1240 bool AArch64LoadStoreOpt::findMatchingStore(
1255 ModifiedRegUnits.clear();
1256 UsedRegUnits.clear();
1265 if (!
MI.isTransient())
1290 if (!ModifiedRegUnits.available(BaseReg))
1296 }
while (
MBBI !=
B && Count < Limit);
1303 LdStPairFlags &Flags,
1306 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1311 !
TII->isLdStPairSuppressed(FirstMI) &&
1312 "FirstMI shouldn't get here if either of these checks are true.");
1315 unsigned OpcB =
MI.getOpcode();
1322 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1324 assert(IsValidLdStrOpc &&
1325 "Given Opc should be a Load or Store with an immediate");
1328 Flags.setSExtIdx(NonSExtOpc == (
unsigned)OpcA ? 1 : 0);
1334 if (!PairIsValidLdStrOpc)
1339 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1349 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1374 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1375 MOP.isImplicit() && MOP.isKill() &&
1376 TRI->regsOverlap(RegToRename, MOP.getReg());
1378 LLVM_DEBUG(
dbgs() <<
" Operand not killed at " << FirstMI <<
"\n");
1390 if (RegClass->HasDisjunctSubRegs) {
1393 <<
" Cannot rename operands with multiple disjunct subregisters ("
1402 bool FoundDef =
false;
1413 LLVM_DEBUG(
dbgs() <<
" Cannot rename framesetup instructions currently ("
1433 if (
MI.isPseudo()) {
1439 for (
auto &MOP :
MI.operands()) {
1443 if (!canRenameMOP(MOP)) {
1445 <<
" Cannot rename " << MOP <<
" in " <<
MI <<
"\n");
1452 for (
auto &MOP :
MI.operands()) {
1457 if (!canRenameMOP(MOP)) {
1459 <<
" Cannot rename " << MOP <<
" in " <<
MI <<
"\n");
1472 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1493 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1496 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1502 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1506 return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1514 !RegInfo.
isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1515 CanBeUsedForAllClasses(PR)) {
1531 LdStPairFlags &Flags,
unsigned Limit,
1532 bool FindNarrowMerge) {
1540 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
1544 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
1549 MaybeCanRename = {
false};
1555 Flags.clearRenameReg();
1559 ModifiedRegUnits.clear();
1560 UsedRegUnits.clear();
1565 for (
unsigned Count = 0;
MBBI !=
E && Count < Limit;
1573 if (!
MI.isTransient())
1576 Flags.setSExtIdx(-1);
1579 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
1588 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
1589 if (IsUnscaled != MIIsUnscaled) {
1593 int MemSize =
TII->getMemScale(
MI);
1597 if (MIOffset % MemSize) {
1600 MemInsns.push_back(&
MI);
1603 MIOffset /= MemSize;
1605 MIOffset *= MemSize;
1611 if (BaseReg == MIBaseReg) {
1617 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
1618 bool IsBaseRegUsed =
1620 bool IsBaseRegModified =
1627 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1631 MemInsns.push_back(&
MI);
1635 if ((Offset != MIOffset + OffsetStride) &&
1636 (Offset + OffsetStride != MIOffset)) {
1639 MemInsns.push_back(&
MI);
1645 if (FindNarrowMerge) {
1650 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
1654 MemInsns.push_back(&
MI);
1665 MemInsns.push_back(&
MI);
1671 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
1674 MemInsns.push_back(&
MI);
1686 MemInsns.push_back(&
MI);
1696 if (!ModifiedRegUnits.available(BaseReg))
1708 Flags.setMergeForward(
false);
1709 Flags.clearRenameReg();
1722 Flags.setMergeForward(
true);
1723 Flags.clearRenameReg();
1728 if (!MaybeCanRename)
1730 RequiredClasses,
TRI)};
1732 if (*MaybeCanRename) {
1735 UsedInBetween, RequiredClasses,
TRI);
1736 if (MaybeRenameReg) {
1737 Flags.setRenameReg(*MaybeRenameReg);
1738 Flags.setMergeForward(
true);
1739 MBBIWithRenameReg =
MBBI;
1749 if (Flags.getRenameReg())
1750 return MBBIWithRenameReg;
1762 if (!ModifiedRegUnits.available(BaseReg))
1766 if (
MI.mayLoadOrStore())
1767 MemInsns.push_back(&
MI);
1774 auto End =
MI.getParent()->end();
1775 if (MaybeCFI == End ||
1776 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
1783 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
1798 assert((Update->getOpcode() == AArch64::ADDXri ||
1799 Update->getOpcode() == AArch64::SUBXri) &&
1800 "Unexpected base register update instruction to merge!");
1812 if (NextI == Update)
1815 int Value = Update->getOperand(2).getImm();
1817 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1818 if (Update->getOpcode() == AArch64::SUBXri)
1824 int Scale, MinOffset, MaxOffset;
1828 MIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
1837 MIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
1867 I->eraseFromParent();
1868 Update->eraseFromParent();
1873 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(
MachineInstr &MemMI,
1875 unsigned BaseReg,
int Offset) {
1876 switch (
MI.getOpcode()) {
1879 case AArch64::SUBXri:
1880 case AArch64::ADDXri:
1883 if (!
MI.getOperand(2).isImm())
1891 if (
MI.getOperand(0).getReg() != BaseReg ||
1892 MI.getOperand(1).getReg() != BaseReg)
1895 int UpdateOffset =
MI.getOperand(2).getImm();
1896 if (
MI.getOpcode() == AArch64::SUBXri)
1897 UpdateOffset = -UpdateOffset;
1901 int Scale, MinOffset, MaxOffset;
1903 if (UpdateOffset % Scale != 0)
1907 int ScaledOffset = UpdateOffset / Scale;
1908 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1913 if (!Offset || Offset == UpdateOffset)
1937 if (MIUnscaledOffset != UnscaledOffset)
1948 for (
unsigned i = 0,
e = IsPairedInsn ? 2 : 1;
i !=
e; ++
i) {
1957 ModifiedRegUnits.clear();
1958 UsedRegUnits.clear();
1964 const bool BaseRegSP = BaseReg == AArch64::SP;
1972 for (
unsigned Count = 0;
MBBI !=
E && Count < Limit;
1978 if (!
MI.isTransient())
1982 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
1992 if (!ModifiedRegUnits.available(BaseReg) ||
1993 !UsedRegUnits.available(BaseReg) ||
1994 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2013 if (
MBBI ==
B || Offset != 0)
2019 for (
unsigned i = 0,
e = IsPairedInsn ? 2 : 1;
i !=
e; ++
i) {
2026 const bool BaseRegSP = BaseReg == AArch64::SP;
2035 unsigned RedZoneSize =
2040 ModifiedRegUnits.clear();
2041 UsedRegUnits.clear();
2043 bool MemAcessBeforeSPPreInc =
false;
2050 if (!
MI.isTransient())
2054 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, Offset)) {
2057 if (MemAcessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2067 if (!ModifiedRegUnits.available(BaseReg) ||
2068 !UsedRegUnits.available(BaseReg))
2073 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2074 MemAcessBeforeSPPreInc =
true;
2075 }
while (
MBBI !=
B && Count < Limit);
2079 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2083 if (
MI.hasOrderedMemoryRef())
2094 ++NumLoadsFromStoresPromoted;
2098 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2105 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2111 if (!
TII->isCandidateToMergeOrPair(
MI))
2115 LdStPairFlags Flags;
2119 ++NumZeroStoresPromoted;
2123 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2135 if (!
TII->isCandidateToMergeOrPair(
MI))
2141 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2143 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2151 LdStPairFlags Flags;
2156 if (
TII->hasUnscaledLdStOffset(
MI))
2157 ++NumUnscaledPairCreated;
2160 auto Prev = std::prev(
MBBI);
2161 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2164 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2172 bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2186 MBBI = mergeUpdateInsn(
MBBI, Update,
false);
2191 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2202 MBBI = mergeUpdateInsn(
MBBI, Update,
true);
2216 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2219 MBBI = mergeUpdateInsn(
MBBI, Update,
true);
2227 bool EnableNarrowZeroStOpt) {
2258 if (EnableNarrowZeroStOpt)
2275 DefinedInBB.
clear();
2276 DefinedInBB.addLiveIns(
MBB);
2284 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
2314 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2319 ModifiedRegUnits.init(*
TRI);
2320 UsedRegUnits.init(*
TRI);
2321 DefinedInBB.init(*
TRI);
2324 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2325 for (
auto &
MBB : Fn) {
2346 return new AArch64LoadStoreOpt();
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This is an optimization pass for GlobalISel generic memory operations.
static bool isMergeableLdStUpdate(MachineInstr &MI)
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
@ Define
Register definition.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
void setIsKill(bool Val=true)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
#define AARCH64_LOAD_STORE_OPT_NAME
Reg
All possible values of the reg field in the ModR/M byte.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
const AArch64InstrInfo * getInstrInfo() const override
detail::concat_range< const MCPhysReg, iterator_range< mc_subreg_iterator >, iterator_range< mc_superreg_iterator > > sub_and_superregs_inclusive(MCRegister Reg) const
Return an iterator range over all sub- and super-registers of Reg, including Reg.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
Properties which a MachineFunction may have at a given point in time.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
unsigned const TargetRegisterInfo * TRI
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const AArch64TargetLowering * getTargetLowering() const override
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) static bool isNarrowStore(unsigned Opc)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
const MachineOperand & getOperand(unsigned i) const
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
static unsigned getPostIndexedOpcode(unsigned Opc)
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
Represent the analysis usage information of a pass.
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
iterator_range< filter_iterator< ConstMIBundleOperands, std::function< bool(const MachineOperand &)> > > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
const HexagonInstrInfo * TII
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineOperand class - Representation of each machine instruction operand.
MachineFunctionProperties & set(Property P)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool shouldExecute(unsigned CounterName)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
STATISTIC(NumFunctions, "Total number of functions")
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
OpType getOperation() const
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
A set of register units used to track register liveness.
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", "Controls which pairs are considered for renaming")
static bool isPairedLdSt(const MachineInstr &MI)
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool needsWinCFI(const MachineFunction *MF)
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
An instruction for storing to memory.
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
reverse_instr_iterator instr_rend()
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
static bool isTagStore(const MachineInstr &MI)
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
static unsigned getPreIndexedOpcode(unsigned Opc)
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
initializer< Ty > init(const Ty &Val)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isEarlyClobber() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
print Print MemDeps of function
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
StringRef - Represent a constant reference to a string, i.e.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineBasicBlock MachineBasicBlock::iterator MBBI
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
self_iterator getIterator()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const MachineBasicBlock * getParent() const
An instruction for reading from memory.
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Wrapper class representing virtual and physical registers.
const AArch64RegisterInfo * getRegisterInfo() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
static bool isPromotableLoadFromStore(MachineInstr &MI)
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void initializeAArch64LoadStoreOptPass(PassRegistry &)
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
bool isSubRegister(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a sub-register of RegA.
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
void setImplicit(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
unsigned getRedZoneSize(const Function &F) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void setReg(Register Reg)
Change the register this operand corresponds to.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Align max(MaybeAlign Lhs, Align Rhs)
A range adaptor for a pair of iterators.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
FunctionPass class - This class is used to implement most global optimizations.
bool isSuperOrSubRegisterEq(MCRegister RegA, MCRegister RegB) const
Returns true if RegB is a super-register or sub-register of RegA or if RegB == RegA.
AnalysisUsage & addRequired()
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static unsigned getMatchingPairOpcode(unsigned Opc)
bool usesWindowsCFI() const
iterator_range< mop_iterator > operands()
LLVM Value Representation.
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Wrapper class representing physical registers. Should be passed by value.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static Optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)