57#define DEBUG_TYPE "aarch64-ldst-opt"
59STATISTIC(NumPairCreated,
"Number of load/store pair instructions generated");
60STATISTIC(NumPostFolded,
"Number of post-index updates folded");
61STATISTIC(NumPreFolded,
"Number of pre-index updates folded");
63 "Number of load/store from unscaled generated");
64STATISTIC(NumZeroStoresPromoted,
"Number of narrow zero stores promoted");
65STATISTIC(NumLoadsFromStoresPromoted,
"Number of loads from stores promoted");
68 "Controls which pairs are considered for renaming");
83#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
87using LdStPairFlags =
struct LdStPairFlags {
91 bool MergeForward =
false;
102 std::optional<MCPhysReg> RenameReg;
104 LdStPairFlags() =
default;
106 void setMergeForward(
bool V =
true) { MergeForward = V; }
107 bool getMergeForward()
const {
return MergeForward; }
109 void setSExtIdx(
int V) { SExtIdx = V; }
110 int getSExtIdx()
const {
return SExtIdx; }
112 void setRenameReg(
MCPhysReg R) { RenameReg = R; }
113 void clearRenameReg() { RenameReg = std::nullopt; }
114 std::optional<MCPhysReg> getRenameReg()
const {
return RenameReg; }
142 LdStPairFlags &
Flags,
144 bool FindNarrowMerge);
155 const LdStPairFlags &
Flags);
161 const LdStPairFlags &
Flags);
173 int UnscaledOffset,
unsigned Limit);
184 unsigned BaseReg,
int Offset);
209 MachineFunctionProperties::Property::NoVRegs);
215char AArch64LoadStoreOpt::ID = 0;
222static
bool isNarrowStore(
unsigned Opc) {
226 case AArch64::STRBBui:
227 case AArch64::STURBBi:
228 case AArch64::STRHHui:
229 case AArch64::STURHHi:
237 switch (
MI.getOpcode()) {
243 case AArch64::STZ2Gi:
249 bool *IsValidLdStrOpc =
nullptr) {
251 *IsValidLdStrOpc =
true;
255 *IsValidLdStrOpc =
false;
256 return std::numeric_limits<unsigned>::max();
257 case AArch64::STRDui:
258 case AArch64::STURDi:
259 case AArch64::STRDpre:
260 case AArch64::STRQui:
261 case AArch64::STURQi:
262 case AArch64::STRQpre:
263 case AArch64::STRBBui:
264 case AArch64::STURBBi:
265 case AArch64::STRHHui:
266 case AArch64::STURHHi:
267 case AArch64::STRWui:
268 case AArch64::STRWpre:
269 case AArch64::STURWi:
270 case AArch64::STRXui:
271 case AArch64::STRXpre:
272 case AArch64::STURXi:
273 case AArch64::LDRDui:
274 case AArch64::LDURDi:
275 case AArch64::LDRDpre:
276 case AArch64::LDRQui:
277 case AArch64::LDURQi:
278 case AArch64::LDRQpre:
279 case AArch64::LDRWui:
280 case AArch64::LDURWi:
281 case AArch64::LDRWpre:
282 case AArch64::LDRXui:
283 case AArch64::LDURXi:
284 case AArch64::LDRXpre:
285 case AArch64::STRSui:
286 case AArch64::STURSi:
287 case AArch64::STRSpre:
288 case AArch64::LDRSui:
289 case AArch64::LDURSi:
290 case AArch64::LDRSpre:
292 case AArch64::LDRSWui:
293 return AArch64::LDRWui;
294 case AArch64::LDURSWi:
295 return AArch64::LDURWi;
303 case AArch64::STRBBui:
304 return AArch64::STRHHui;
305 case AArch64::STRHHui:
306 return AArch64::STRWui;
307 case AArch64::STURBBi:
308 return AArch64::STURHHi;
309 case AArch64::STURHHi:
310 return AArch64::STURWi;
311 case AArch64::STURWi:
312 return AArch64::STURXi;
313 case AArch64::STRWui:
314 return AArch64::STRXui;
322 case AArch64::STRSui:
323 case AArch64::STURSi:
324 return AArch64::STPSi;
325 case AArch64::STRSpre:
326 return AArch64::STPSpre;
327 case AArch64::STRDui:
328 case AArch64::STURDi:
329 return AArch64::STPDi;
330 case AArch64::STRDpre:
331 return AArch64::STPDpre;
332 case AArch64::STRQui:
333 case AArch64::STURQi:
334 return AArch64::STPQi;
335 case AArch64::STRQpre:
336 return AArch64::STPQpre;
337 case AArch64::STRWui:
338 case AArch64::STURWi:
339 return AArch64::STPWi;
340 case AArch64::STRWpre:
341 return AArch64::STPWpre;
342 case AArch64::STRXui:
343 case AArch64::STURXi:
344 return AArch64::STPXi;
345 case AArch64::STRXpre:
346 return AArch64::STPXpre;
347 case AArch64::LDRSui:
348 case AArch64::LDURSi:
349 return AArch64::LDPSi;
350 case AArch64::LDRSpre:
351 return AArch64::LDPSpre;
352 case AArch64::LDRDui:
353 case AArch64::LDURDi:
354 return AArch64::LDPDi;
355 case AArch64::LDRDpre:
356 return AArch64::LDPDpre;
357 case AArch64::LDRQui:
358 case AArch64::LDURQi:
359 return AArch64::LDPQi;
360 case AArch64::LDRQpre:
361 return AArch64::LDPQpre;
362 case AArch64::LDRWui:
363 case AArch64::LDURWi:
364 return AArch64::LDPWi;
365 case AArch64::LDRWpre:
366 return AArch64::LDPWpre;
367 case AArch64::LDRXui:
368 case AArch64::LDURXi:
369 return AArch64::LDPXi;
370 case AArch64::LDRXpre:
371 return AArch64::LDPXpre;
372 case AArch64::LDRSWui:
373 case AArch64::LDURSWi:
374 return AArch64::LDPSWi;
385 case AArch64::LDRBBui:
386 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
387 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
388 case AArch64::LDURBBi:
389 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
390 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
391 case AArch64::LDRHHui:
392 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
393 StOpc == AArch64::STRXui;
394 case AArch64::LDURHHi:
395 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
396 StOpc == AArch64::STURXi;
397 case AArch64::LDRWui:
398 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
399 case AArch64::LDURWi:
400 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
401 case AArch64::LDRXui:
402 return StOpc == AArch64::STRXui;
403 case AArch64::LDURXi:
404 return StOpc == AArch64::STURXi;
416 case AArch64::STRSui:
417 return AArch64::STRSpre;
418 case AArch64::STRDui:
419 return AArch64::STRDpre;
420 case AArch64::STRQui:
421 return AArch64::STRQpre;
422 case AArch64::STRBBui:
423 return AArch64::STRBBpre;
424 case AArch64::STRHHui:
425 return AArch64::STRHHpre;
426 case AArch64::STRWui:
427 return AArch64::STRWpre;
428 case AArch64::STRXui:
429 return AArch64::STRXpre;
430 case AArch64::LDRSui:
431 return AArch64::LDRSpre;
432 case AArch64::LDRDui:
433 return AArch64::LDRDpre;
434 case AArch64::LDRQui:
435 return AArch64::LDRQpre;
436 case AArch64::LDRBBui:
437 return AArch64::LDRBBpre;
438 case AArch64::LDRHHui:
439 return AArch64::LDRHHpre;
440 case AArch64::LDRWui:
441 return AArch64::LDRWpre;
442 case AArch64::LDRXui:
443 return AArch64::LDRXpre;
444 case AArch64::LDRSWui:
445 return AArch64::LDRSWpre;
447 return AArch64::LDPSpre;
448 case AArch64::LDPSWi:
449 return AArch64::LDPSWpre;
451 return AArch64::LDPDpre;
453 return AArch64::LDPQpre;
455 return AArch64::LDPWpre;
457 return AArch64::LDPXpre;
459 return AArch64::STPSpre;
461 return AArch64::STPDpre;
463 return AArch64::STPQpre;
465 return AArch64::STPWpre;
467 return AArch64::STPXpre;
469 return AArch64::STGPreIndex;
471 return AArch64::STZGPreIndex;
473 return AArch64::ST2GPreIndex;
474 case AArch64::STZ2Gi:
475 return AArch64::STZ2GPreIndex;
477 return AArch64::STGPpre;
485 case AArch64::STRSui:
486 case AArch64::STURSi:
487 return AArch64::STRSpost;
488 case AArch64::STRDui:
489 case AArch64::STURDi:
490 return AArch64::STRDpost;
491 case AArch64::STRQui:
492 case AArch64::STURQi:
493 return AArch64::STRQpost;
494 case AArch64::STRBBui:
495 return AArch64::STRBBpost;
496 case AArch64::STRHHui:
497 return AArch64::STRHHpost;
498 case AArch64::STRWui:
499 case AArch64::STURWi:
500 return AArch64::STRWpost;
501 case AArch64::STRXui:
502 case AArch64::STURXi:
503 return AArch64::STRXpost;
504 case AArch64::LDRSui:
505 case AArch64::LDURSi:
506 return AArch64::LDRSpost;
507 case AArch64::LDRDui:
508 case AArch64::LDURDi:
509 return AArch64::LDRDpost;
510 case AArch64::LDRQui:
511 case AArch64::LDURQi:
512 return AArch64::LDRQpost;
513 case AArch64::LDRBBui:
514 return AArch64::LDRBBpost;
515 case AArch64::LDRHHui:
516 return AArch64::LDRHHpost;
517 case AArch64::LDRWui:
518 case AArch64::LDURWi:
519 return AArch64::LDRWpost;
520 case AArch64::LDRXui:
521 case AArch64::LDURXi:
522 return AArch64::LDRXpost;
523 case AArch64::LDRSWui:
524 return AArch64::LDRSWpost;
526 return AArch64::LDPSpost;
527 case AArch64::LDPSWi:
528 return AArch64::LDPSWpost;
530 return AArch64::LDPDpost;
532 return AArch64::LDPQpost;
534 return AArch64::LDPWpost;
536 return AArch64::LDPXpost;
538 return AArch64::STPSpost;
540 return AArch64::STPDpost;
542 return AArch64::STPQpost;
544 return AArch64::STPWpost;
546 return AArch64::STPXpost;
548 return AArch64::STGPostIndex;
550 return AArch64::STZGPostIndex;
552 return AArch64::ST2GPostIndex;
553 case AArch64::STZ2Gi:
554 return AArch64::STZ2GPostIndex;
556 return AArch64::STGPpost;
563 unsigned OpcB =
MI.getOpcode();
568 case AArch64::STRSpre:
569 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
570 case AArch64::STRDpre:
571 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
572 case AArch64::STRQpre:
573 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
574 case AArch64::STRWpre:
575 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
576 case AArch64::STRXpre:
577 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
578 case AArch64::LDRSpre:
579 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
580 case AArch64::LDRDpre:
581 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
582 case AArch64::LDRQpre:
583 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
584 case AArch64::LDRWpre:
585 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
586 case AArch64::LDRXpre:
587 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
593 int &MinOffset,
int &MaxOffset) {
611 unsigned PairedRegOp = 0) {
612 assert(PairedRegOp < 2 &&
"Unexpected register operand idx.");
618 return MI.getOperand(
Idx);
627 int UnscaledStOffset =
631 int UnscaledLdOffset =
635 return (UnscaledStOffset <= UnscaledLdOffset) &&
636 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
640 unsigned Opc =
MI.getOpcode();
641 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
642 isNarrowStore(Opc)) &&
647 switch (
MI.getOpcode()) {
651 case AArch64::LDRBBui:
652 case AArch64::LDRHHui:
653 case AArch64::LDRWui:
654 case AArch64::LDRXui:
656 case AArch64::LDURBBi:
657 case AArch64::LDURHHi:
658 case AArch64::LDURWi:
659 case AArch64::LDURXi:
665 unsigned Opc =
MI.getOpcode();
670 case AArch64::STRSui:
671 case AArch64::STRDui:
672 case AArch64::STRQui:
673 case AArch64::STRXui:
674 case AArch64::STRWui:
675 case AArch64::STRHHui:
676 case AArch64::STRBBui:
677 case AArch64::LDRSui:
678 case AArch64::LDRDui:
679 case AArch64::LDRQui:
680 case AArch64::LDRXui:
681 case AArch64::LDRWui:
682 case AArch64::LDRHHui:
683 case AArch64::LDRBBui:
687 case AArch64::STZ2Gi:
690 case AArch64::STURSi:
691 case AArch64::STURDi:
692 case AArch64::STURQi:
693 case AArch64::STURWi:
694 case AArch64::STURXi:
695 case AArch64::LDURSi:
696 case AArch64::LDURDi:
697 case AArch64::LDURQi:
698 case AArch64::LDURWi:
699 case AArch64::LDURXi:
702 case AArch64::LDPSWi:
723 const LdStPairFlags &Flags) {
725 "Expected promotable zero stores.");
733 if (NextI == MergeMI)
736 unsigned Opc =
I->getOpcode();
737 unsigned MergeMIOpc = MergeMI->getOpcode();
738 bool IsScaled = !
TII->hasUnscaledLdStOffset(Opc);
739 bool IsMergedMIScaled = !
TII->hasUnscaledLdStOffset(MergeMIOpc);
740 int OffsetStride = IsScaled ?
TII->getMemScale(*
I) : 1;
741 int MergeMIOffsetStride = IsMergedMIScaled ?
TII->getMemScale(*MergeMI) : 1;
743 bool MergeForward =
Flags.getMergeForward();
754 int64_t IOffsetInBytes =
756 int64_t MIOffsetInBytes =
761 if (IOffsetInBytes > MIOffsetInBytes)
762 OffsetImm = MIOffsetInBytes;
764 OffsetImm = IOffsetInBytes;
767 bool FinalIsScaled = !
TII->hasUnscaledLdStOffset(NewOpcode);
771 int NewOffsetStride = FinalIsScaled ?
TII->getMemScale(NewOpcode) : 1;
772 assert(((OffsetImm % NewOffsetStride) == 0) &&
773 "Offset should be a multiple of the store memory scale");
774 OffsetImm = OffsetImm / NewOffsetStride;
782 .
addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
786 .setMIFlags(
I->mergeFlagsWith(*MergeMI));
789 LLVM_DEBUG(
dbgs() <<
"Creating wider store. Replacing instructions:\n ");
798 I->eraseFromParent();
799 MergeMI->eraseFromParent();
809 auto MBB =
MI.getParent();
817 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
818 TRI->regsOverlap(MOP.getReg(), DefReg);
832 if (MOP.isReg() && MOP.isKill())
836 if (MOP.isReg() && !MOP.isKill())
837 Units.
addReg(MOP.getReg());
843 const LdStPairFlags &Flags) {
853 int SExtIdx =
Flags.getSExtIdx();
856 bool IsUnscaled =
TII->hasUnscaledLdStOffset(Opc);
857 int OffsetStride = IsUnscaled ?
TII->getMemScale(*
I) : 1;
859 bool MergeForward =
Flags.getMergeForward();
861 std::optional<MCPhysReg> RenameReg =
Flags.getRenameReg();
862 if (MergeForward && RenameReg) {
864 DefinedInBB.addReg(*RenameReg);
868 auto GetMatchingSubReg = [
this,
870 for (
MCPhysReg SubOrSuper :
TRI->sub_and_superregs_inclusive(*RenameReg))
871 if (
TRI->getMinimalPhysRegClass(OriginalReg) ==
872 TRI->getMinimalPhysRegClass(SubOrSuper))
878 [
this, RegToRename, GetMatchingSubReg](
MachineInstr &
MI,
bool IsDef) {
880 bool SeenDef =
false;
881 for (
auto &MOP :
MI.operands()) {
884 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
885 (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
886 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
887 assert((MOP.isImplicit() ||
888 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
889 "Need renamable operands");
890 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
895 for (
auto &MOP :
MI.operands()) {
896 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
897 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
898 assert((MOP.isImplicit() ||
899 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
900 "Need renamable operands");
901 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
916 std::next(
I), std::next(Paired)))
919 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
921 !TRI->regsOverlap(MOP.getReg(), *RenameReg);
923 "Rename register used between paired instruction, trashing the "
939 bool PairedIsUnscaled =
TII->hasUnscaledLdStOffset(Paired->getOpcode());
940 if (IsUnscaled != PairedIsUnscaled) {
944 int MemSize =
TII->getMemScale(*Paired);
945 if (PairedIsUnscaled) {
948 assert(!(PairedOffset %
TII->getMemScale(*Paired)) &&
949 "Offset should be a multiple of the stride!");
950 PairedOffset /= MemSize;
952 PairedOffset *= MemSize;
960 if (
Offset == PairedOffset + OffsetStride &&
968 SExtIdx = (SExtIdx + 1) % 2;
976 assert(!(OffsetImm %
TII->getMemScale(*RtMI)) &&
977 "Unscaled offset cannot be scaled.");
978 OffsetImm /=
TII->getMemScale(*RtMI);
988 if (RegOp0.
isUse()) {
1003 MI.clearRegisterKills(Reg,
TRI);
1019 .setMIFlags(
I->mergeFlagsWith(*Paired));
1024 dbgs() <<
"Creating pair load/store. Replacing instructions:\n ");
1029 if (SExtIdx != -1) {
1039 Register DstRegW =
TRI->getSubReg(DstRegX, AArch64::sub_32);
1049 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(TargetOpcode::KILL), DstRegW)
1055 BuildMI(*
MBB, InsertionPoint,
DL,
TII->get(AArch64::SBFMXri), DstRegX)
1069 if (MOP.isReg() && MOP.isKill())
1070 DefinedInBB.addReg(MOP.getReg());
1073 I->eraseFromParent();
1074 Paired->eraseFromParent();
1083 next_nodbg(LoadI, LoadI->getParent()->end());
1085 int LoadSize =
TII->getMemScale(*LoadI);
1086 int StoreSize =
TII->getMemScale(*StoreI);
1090 bool IsStoreXReg =
TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1093 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1094 "Unexpected RegClass");
1097 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1100 if (StRt == LdRt && LoadSize == 8) {
1102 LoadI->getIterator())) {
1103 if (
MI.killsRegister(StRt,
TRI)) {
1104 MI.clearRegisterKills(StRt,
TRI);
1111 LoadI->eraseFromParent();
1116 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1117 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1118 .
addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1125 if (!Subtarget->isLittleEndian())
1127 bool IsUnscaled =
TII->hasUnscaledLdStOffset(*LoadI);
1128 assert(IsUnscaled ==
TII->hasUnscaledLdStOffset(*StoreI) &&
1129 "Unsupported ld/st match");
1130 assert(LoadSize <= StoreSize &&
"Invalid load size");
1131 int UnscaledLdOffset =
1135 int UnscaledStOffset =
1139 int Width = LoadSize * 8;
1142 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1145 assert((UnscaledLdOffset >= UnscaledStOffset &&
1146 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1149 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1150 int Imms = Immr +
Width - 1;
1151 if (UnscaledLdOffset == UnscaledStOffset) {
1152 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12)
1158 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1159 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1166 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1167 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1179 if (
MI.killsRegister(StRt,
TRI)) {
1180 MI.clearRegisterKills(StRt,
TRI);
1195 LoadI->eraseFromParent();
1205 if (
Offset % OffsetStride)
1209 return Offset <= 63 && Offset >= -64;
1217 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1230bool AArch64LoadStoreOpt::findMatchingStore(
1245 ModifiedRegUnits.clear();
1246 UsedRegUnits.clear();
1255 if (!
MI.isTransient())
1281 if (!ModifiedRegUnits.available(BaseReg))
1287 }
while (
MBBI !=
B && Count < Limit);
1299 LdStPairFlags &Flags,
1302 if (
MI.hasOrderedMemoryRef() ||
TII->isLdStPairSuppressed(
MI))
1307 !
TII->isLdStPairSuppressed(FirstMI) &&
1308 "FirstMI shouldn't get here if either of these checks are true.");
1315 unsigned OpcB =
MI.getOpcode();
1322 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1324 assert(IsValidLdStrOpc &&
1325 "Given Opc should be a Load or Store with an immediate");
1328 Flags.setSExtIdx(NonSExtOpc == (
unsigned)OpcA ? 1 : 0);
1334 if (!PairIsValidLdStrOpc)
1339 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1349 return TII->hasUnscaledLdStOffset(OpcA) !=
TII->hasUnscaledLdStOffset(OpcB) &&
1374 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1375 MOP.isImplicit() && MOP.isKill() &&
1376 TRI->regsOverlap(RegToRename, MOP.getReg());
1378 LLVM_DEBUG(
dbgs() <<
" Operand not killed at " << FirstMI <<
"\n");
1383 auto *RegClass =
TRI->getMinimalPhysRegClass(MOP.
getReg());
1390 if (RegClass->HasDisjunctSubRegs) {
1393 <<
" Cannot rename operands with multiple disjunct subregisters ("
1402 bool FoundDef =
false;
1413 LLVM_DEBUG(
dbgs() <<
" Cannot rename framesetup instructions currently ("
1433 if (
MI.isPseudo()) {
1439 for (
auto &MOP :
MI.operands()) {
1441 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1443 if (!canRenameMOP(MOP)) {
1445 <<
" Cannot rename " << MOP <<
" in " <<
MI <<
"\n");
1452 for (
auto &MOP :
MI.operands()) {
1454 !
TRI->regsOverlap(MOP.
getReg(), RegToRename))
1457 if (!canRenameMOP(MOP)) {
1459 <<
" Cannot rename " << MOP <<
" in " <<
MI <<
"\n");
1472 LLVM_DEBUG(
dbgs() <<
" Did not find definition for register in BB\n");
1493 auto AnySubOrSuperRegCalleePreserved = [&MF,
TRI](
MCPhysReg PR) {
1494 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1496 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1502 auto CanBeUsedForAllClasses = [&RequiredClasses,
TRI](
MCPhysReg PR) {
1504 return any_of(
TRI->sub_and_superregs_inclusive(PR),
1506 return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1511 auto *RegClass =
TRI->getMinimalPhysRegClass(Reg);
1514 !
RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1515 CanBeUsedForAllClasses(PR)) {
1523 <<
TRI->getRegClassName(RegClass) <<
"\n");
1524 return std::nullopt;
1531 LdStPairFlags &Flags,
unsigned Limit,
1532 bool FindNarrowMerge) {
1540 bool IsUnscaled =
TII->hasUnscaledLdStOffset(FirstMI);
1544 int OffsetStride = IsUnscaled ?
TII->getMemScale(FirstMI) : 1;
1547 std::optional<bool> MaybeCanRename;
1549 MaybeCanRename = {
false};
1555 Flags.clearRenameReg();
1559 ModifiedRegUnits.clear();
1560 UsedRegUnits.clear();
1565 for (
unsigned Count = 0;
MBBI !=
E && Count < Limit;
1573 if (!
MI.isTransient())
1576 Flags.setSExtIdx(-1);
1579 assert(
MI.mayLoadOrStore() &&
"Expected memory operation.");
1588 bool MIIsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
1589 if (IsUnscaled != MIIsUnscaled) {
1593 int MemSize =
TII->getMemScale(
MI);
1597 if (MIOffset % MemSize) {
1603 MIOffset /= MemSize;
1605 MIOffset *= MemSize;
1611 if (BaseReg == MIBaseReg) {
1617 bool IsOutOfBounds = MIOffset !=
TII->getMemScale(
MI);
1618 bool IsBaseRegUsed = !UsedRegUnits.available(
1620 bool IsBaseRegModified = !ModifiedRegUnits.available(
1625 bool IsMIRegTheSame =
1628 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1636 if ((
Offset != MIOffset + OffsetStride) &&
1637 (
Offset + OffsetStride != MIOffset)) {
1646 if (FindNarrowMerge) {
1651 if ((!IsUnscaled &&
alignTo(MinOffset, 2) != MinOffset) ||
1672 if (IsUnscaled && (
alignTo(MinOffset, OffsetStride) != MinOffset)) {
1697 if (!ModifiedRegUnits.available(BaseReg))
1709 Flags.setMergeForward(
false);
1710 Flags.clearRenameReg();
1720 !
mayAlias(FirstMI, MemInsns, AA)) {
1723 Flags.setMergeForward(
true);
1724 Flags.clearRenameReg();
1729 if (!MaybeCanRename)
1731 RequiredClasses,
TRI)};
1733 if (*MaybeCanRename) {
1734 std::optional<MCPhysReg> MaybeRenameReg =
1736 Reg, DefinedInBB, UsedInBetween,
1737 RequiredClasses,
TRI);
1738 if (MaybeRenameReg) {
1739 Flags.setRenameReg(*MaybeRenameReg);
1740 Flags.setMergeForward(
true);
1741 MBBIWithRenameReg =
MBBI;
1751 if (
Flags.getRenameReg())
1752 return MBBIWithRenameReg;
1764 if (!ModifiedRegUnits.available(BaseReg))
1768 if (
MI.mayLoadOrStore())
1776 auto End =
MI.getParent()->end();
1777 if (MaybeCFI ==
End ||
1778 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
1785 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
1800 assert((Update->getOpcode() == AArch64::ADDXri ||
1801 Update->getOpcode() == AArch64::SUBXri) &&
1802 "Unexpected base register update instruction to merge!");
1814 if (NextI == Update)
1817 int Value = Update->getOperand(2).getImm();
1819 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1820 if (Update->getOpcode() == AArch64::SUBXri)
1826 int Scale, MinOffset, MaxOffset;
1830 MIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
1839 MIB =
BuildMI(*
I->getParent(),
I,
I->getDebugLoc(),
TII->get(NewOpc))
1869 I->eraseFromParent();
1870 Update->eraseFromParent();
1875bool AArch64LoadStoreOpt::isMatchingUpdateInsn(
MachineInstr &MemMI,
1877 unsigned BaseReg,
int Offset) {
1878 switch (
MI.getOpcode()) {
1881 case AArch64::SUBXri:
1882 case AArch64::ADDXri:
1885 if (!
MI.getOperand(2).isImm())
1893 if (
MI.getOperand(0).getReg() != BaseReg ||
1894 MI.getOperand(1).getReg() != BaseReg)
1897 int UpdateOffset =
MI.getOperand(2).getImm();
1898 if (
MI.getOpcode() == AArch64::SUBXri)
1899 UpdateOffset = -UpdateOffset;
1903 int Scale, MinOffset, MaxOffset;
1905 if (UpdateOffset % Scale != 0)
1909 int ScaledOffset = UpdateOffset / Scale;
1910 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1930 TII->getMemScale(MemMI);
1935 if (MIUnscaledOffset != UnscaledOffset)
1946 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
1948 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
1955 ModifiedRegUnits.clear();
1956 UsedRegUnits.clear();
1962 const bool BaseRegSP = BaseReg == AArch64::SP;
1970 for (
unsigned Count = 0;
MBBI !=
E && Count < Limit;
1976 if (!
MI.isTransient())
1980 if (isMatchingUpdateInsn(*
I,
MI, BaseReg, UnscaledOffset))
1990 if (!ModifiedRegUnits.available(BaseReg) ||
1991 !UsedRegUnits.available(BaseReg) ||
1992 (BaseRegSP &&
MBBI->mayLoadOrStore()))
2017 for (
unsigned i = 0, e = IsPairedInsn ? 2 : 1; i !=
e; ++i) {
2019 if (DestReg == BaseReg ||
TRI->isSubRegister(BaseReg, DestReg))
2024 const bool BaseRegSP = BaseReg == AArch64::SP;
2033 unsigned RedZoneSize =
2038 ModifiedRegUnits.clear();
2039 UsedRegUnits.clear();
2041 bool MemAcessBeforeSPPreInc =
false;
2048 if (!
MI.isTransient())
2052 if (isMatchingUpdateInsn(*
I,
MI, BaseReg,
Offset)) {
2055 if (MemAcessBeforeSPPreInc &&
MBBI->getOperand(2).getImm() > RedZoneSize)
2065 if (!ModifiedRegUnits.available(BaseReg) ||
2066 !UsedRegUnits.available(BaseReg))
2071 if (BaseRegSP &&
MBBI->mayLoadOrStore())
2072 MemAcessBeforeSPPreInc =
true;
2073 }
while (
MBBI !=
B && Count < Limit);
2077bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2081 if (
MI.hasOrderedMemoryRef())
2095 ++NumLoadsFromStoresPromoted;
2099 MBBI = promoteLoadFromStore(
MBBI, StoreI);
2106bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2112 if (!
TII->isCandidateToMergeOrPair(
MI))
2116 LdStPairFlags
Flags;
2120 ++NumZeroStoresPromoted;
2124 MBBI = mergeNarrowZeroStores(
MBBI, MergeMI, Flags);
2136 if (!
TII->isCandidateToMergeOrPair(
MI))
2142 bool IsUnscaled =
TII->hasUnscaledLdStOffset(
MI);
2144 int OffsetStride = IsUnscaled ?
TII->getMemScale(
MI) : 1;
2152 LdStPairFlags
Flags;
2157 if (
TII->hasUnscaledLdStOffset(
MI))
2158 ++NumUnscaledPairCreated;
2161 auto Prev = std::prev(
MBBI);
2162 MBBI = mergePairedInsns(
MBBI, Paired, Flags);
2165 for (
auto I = std::next(Prev);
I !=
MBBI;
I++)
2173bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2187 MBBI = mergeUpdateInsn(
MBBI, Update,
false);
2192 if (
TII->hasUnscaledLdStOffset(
MI.getOpcode()))
2203 MBBI = mergeUpdateInsn(
MBBI, Update,
true);
2210 int UnscaledOffset =
2218 Update = findMatchingUpdateInsnForward(
MBBI, UnscaledOffset,
UpdateLimit);
2221 MBBI = mergeUpdateInsn(
MBBI, Update,
true);
2229 bool EnableNarrowZeroStOpt) {
2260 if (EnableNarrowZeroStOpt)
2277 DefinedInBB.
clear();
2278 DefinedInBB.addLiveIns(
MBB);
2286 if (
TII->isPairableLdStInst(*
MBBI) && tryToPairLdStInst(
MBBI))
2316 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2321 ModifiedRegUnits.init(*
TRI);
2322 UsedRegUnits.init(*
TRI);
2323 DefinedInBB.init(*
TRI);
2326 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2327 for (
auto &
MBB : Fn) {
2348 return new AArch64LoadStoreOpt();
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static int alignTo(int Num, int PowOf2)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the the immediate offset operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(unsigned CounterName)
FunctionPass class - This class is used to implement most global optimizations.
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
OpType getOperation() const
Wrapper class representing physical registers. Should be passed by value.
reverse_instr_iterator instr_rend()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
iterator_range< mop_iterator > operands()
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM Value Representation.
self_iterator getIterator()
A range adaptor for a pair of iterators.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range< filter_iterator< ConstMIBundleOperands, std::function< bool(const MachineOperand &)> > > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
void initializeAArch64LoadStoreOptPass(PassRegistry &)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.