LLVM 23.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load / store related peephole
10// optimizations. This pass should be run after register allocation.
11//
12// The pass runs after the PrologEpilogInserter where we emit the CFI
13// instructions. In order to preserve the correctness of the unwind information,
14// the pass should not change the order of any two instructions, one of which
15// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16// to unwind information.
17//
18//===----------------------------------------------------------------------===//
19
20#include "AArch64InstrInfo.h"
22#include "AArch64Subtarget.h"
24#include "llvm/ADT/SetVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringRef.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/MC/MCAsmInfo.h"
40#include "llvm/MC/MCDwarf.h"
41#include "llvm/Pass.h"
43#include "llvm/Support/Debug.h"
46#include <cassert>
47#include <cstdint>
48#include <functional>
49#include <iterator>
50#include <limits>
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "aarch64-ldst-opt"
56
57STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded, "Number of post-index updates folded");
59STATISTIC(NumPreFolded, "Number of pre-index updates folded");
60STATISTIC(NumUnscaledPairCreated,
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
65 "not passed the alignment check");
66STATISTIC(NumConstOffsetFolded,
67 "Number of const offset of index address folded");
68STATISTIC(NumUMOVFoldedToFPRStore,
69 "Number of UMOV + GPR stores folded to FPR stores");
70
71DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
72 "Controls which pairs are considered for renaming");
73
74// The LdStLimit limits how far we search for load/store pairs.
75static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
76 cl::init(20), cl::Hidden);
77
78// The UpdateLimit limits how far we search for update instructions when we form
79// pre-/post-index instructions.
80static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
82
83// The LdStConstLimit limits how far we search for const offset instructions
84// when we form index address load/store instructions.
85static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
86 cl::init(10), cl::Hidden);
87
88// The UMOVFoldLimit limits how far back we scan from a GPR store to find a
89// UMOV that can be folded into a direct FPR store.
90static cl::opt<unsigned> UMOVFoldLimit("aarch64-umov-fold-scan-limit",
91 cl::init(16), cl::Hidden);
92
93// Enable register renaming to find additional store pairing opportunities.
94static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
95 cl::init(true), cl::Hidden);
96
97#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
98
99namespace {
100
101using LdStPairFlags = struct LdStPairFlags {
102 // If a matching instruction is found, MergeForward is set to true if the
103 // merge is to remove the first instruction and replace the second with
104 // a pair-wise insn, and false if the reverse is true.
105 bool MergeForward = false;
106
107 // SExtIdx gives the index of the result of the load pair that must be
108 // extended. The value of SExtIdx assumes that the paired load produces the
109 // value in this order: (I, returned iterator), i.e., -1 means no value has
110 // to be extended, 0 means I, and 1 means the returned iterator.
111 int SExtIdx = -1;
112
113 // If not none, RenameReg can be used to rename the result register of the
114 // first store in a pair. Currently this only works when merging stores
115 // forward.
116 std::optional<MCPhysReg> RenameReg;
117
118 LdStPairFlags() = default;
119
120 void setMergeForward(bool V = true) { MergeForward = V; }
121 bool getMergeForward() const { return MergeForward; }
122
123 void setSExtIdx(int V) { SExtIdx = V; }
124 int getSExtIdx() const { return SExtIdx; }
125
126 void setRenameReg(MCPhysReg R) { RenameReg = R; }
127 void clearRenameReg() { RenameReg = std::nullopt; }
128 std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
129};
130
131struct AArch64LoadStoreOpt {
133 const AArch64InstrInfo *TII;
134 const TargetRegisterInfo *TRI;
135 const AArch64Subtarget *Subtarget;
136
137 // Track which register units have been modified and used.
138 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
139 LiveRegUnits DefinedInBB;
140
141 // Scan the instructions looking for a load/store that can be combined
142 // with the current instruction into a load/store pair.
143 // Return the matching instruction if one is found, else MBB->end().
145 LdStPairFlags &Flags,
146 unsigned Limit,
147 bool FindNarrowMerge);
148
149 // Scan the instructions looking for a store that writes to the address from
150 // which the current load instruction reads. Return true if one is found.
151 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
153
154 // Merge the two instructions indicated into a wider narrow store instruction.
156 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
158 const LdStPairFlags &Flags);
159
160 // Merge the two instructions indicated into a single pair-wise instruction.
162 mergePairedInsns(MachineBasicBlock::iterator I,
164 const LdStPairFlags &Flags);
165
166 // Promote the load that reads directly from the address stored to.
168 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
170
171 // Scan the instruction list to find a base register update that can
172 // be combined with the current instruction (a load or store) using
173 // pre or post indexed addressing with writeback. Scan forwards.
175 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
176 int UnscaledOffset, unsigned Limit);
177
178 // Scan the instruction list to find a register assigned with a const
179 // value that can be combined with the current instruction (a load or store)
180 // using base addressing with writeback. Scan backwards.
182 findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
183 unsigned &Offset);
184
185 // Scan the instruction list to find a base register update that can
186 // be combined with the current instruction (a load or store) using
187 // pre or post indexed addressing with writeback. Scan backwards.
188 // `MergeEither` is set to true if the combined instruction may be placed
189 // either at the location of the load/store instruction or at the location of
190 // the update instruction.
192 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
193 bool &MergeEither);
194
195 // Find an instruction that updates the base register of the ld/st
196 // instruction.
197 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
198 unsigned BaseReg, int Offset);
199
200 bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
201 unsigned IndexReg, unsigned &Offset);
202
203 // Merge a pre- or post-index base register update into a ld/st instruction.
204 std::optional<MachineBasicBlock::iterator>
205 mergeUpdateInsn(MachineBasicBlock::iterator I,
206 MachineBasicBlock::iterator Update, bool IsForward,
207 bool IsPreIdx, bool MergeEither);
208
210 mergeConstOffsetInsn(MachineBasicBlock::iterator I,
211 MachineBasicBlock::iterator Update, unsigned Offset,
212 int Scale);
213
214 // Find and merge zero store instructions.
215 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
216
217 // Find and pair ldr/str instructions.
218 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
219
220 // Find and promote load instructions which read directly from store.
221 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
222
223 // Find and merge a base register updates before or after a ld/st instruction.
224 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
225
226 // Find and merge an index ldr/st instruction into a base ld/st instruction.
227 bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
228
229 // Replace a UMOV (lane 0) + GPR store with a direct FPR sub-register store.
230 bool tryToReplaceUMOVStore(MachineBasicBlock::iterator &MBBI);
231
232 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
233
234 bool runOnMachineFunction(MachineFunction &MF);
235};
236
237struct AArch64LoadStoreOptLegacy : public MachineFunctionPass {
238 static char ID;
239
240 AArch64LoadStoreOptLegacy() : MachineFunctionPass(ID) {}
241
242 bool runOnMachineFunction(MachineFunction &Fn) override;
243
244 void getAnalysisUsage(AnalysisUsage &AU) const override {
247 }
248
249 MachineFunctionProperties getRequiredProperties() const override {
250 return MachineFunctionProperties().setNoVRegs();
251 }
252
253 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
254};
255
256char AArch64LoadStoreOptLegacy::ID = 0;
257
258} // end anonymous namespace
259
260INITIALIZE_PASS(AArch64LoadStoreOptLegacy, "aarch64-ldst-opt",
261 AARCH64_LOAD_STORE_OPT_NAME, false, false)
262
263static bool isNarrowStore(unsigned Opc) {
264 switch (Opc) {
265 default:
266 return false;
267 case AArch64::STRBBui:
268 case AArch64::STURBBi:
269 case AArch64::STRHHui:
270 case AArch64::STURHHi:
271 return true;
272 }
273}
274
275// These instruction set memory tag and either keep memory contents unchanged or
276// set it to zero, ignoring the address part of the source register.
277static bool isTagStore(const MachineInstr &MI) {
278 switch (MI.getOpcode()) {
279 default:
280 return false;
281 case AArch64::STGi:
282 case AArch64::STZGi:
283 case AArch64::ST2Gi:
284 case AArch64::STZ2Gi:
285 return true;
286 }
287}
288
289static unsigned getMatchingNonSExtOpcode(unsigned Opc,
290 bool *IsValidLdStrOpc = nullptr) {
291 if (IsValidLdStrOpc)
292 *IsValidLdStrOpc = true;
293 switch (Opc) {
294 default:
295 if (IsValidLdStrOpc)
296 *IsValidLdStrOpc = false;
297 return std::numeric_limits<unsigned>::max();
298 case AArch64::STRDui:
299 case AArch64::STURDi:
300 case AArch64::STRDpre:
301 case AArch64::STRQui:
302 case AArch64::STURQi:
303 case AArch64::STRQpre:
304 case AArch64::STRBBui:
305 case AArch64::STURBBi:
306 case AArch64::STRHHui:
307 case AArch64::STURHHi:
308 case AArch64::STRWui:
309 case AArch64::STRWpre:
310 case AArch64::STURWi:
311 case AArch64::STRXui:
312 case AArch64::STRXpre:
313 case AArch64::STURXi:
314 case AArch64::STR_ZXI:
315 case AArch64::LDRDui:
316 case AArch64::LDURDi:
317 case AArch64::LDRDpre:
318 case AArch64::LDRQui:
319 case AArch64::LDURQi:
320 case AArch64::LDRQpre:
321 case AArch64::LDRWui:
322 case AArch64::LDURWi:
323 case AArch64::LDRWpre:
324 case AArch64::LDRXui:
325 case AArch64::LDURXi:
326 case AArch64::LDRXpre:
327 case AArch64::STRSui:
328 case AArch64::STURSi:
329 case AArch64::STRSpre:
330 case AArch64::LDRSui:
331 case AArch64::LDURSi:
332 case AArch64::LDRSpre:
333 case AArch64::LDR_ZXI:
334 return Opc;
335 case AArch64::LDRSWui:
336 return AArch64::LDRWui;
337 case AArch64::LDURSWi:
338 return AArch64::LDURWi;
339 case AArch64::LDRSWpre:
340 return AArch64::LDRWpre;
341 }
342}
343
344static unsigned getMatchingWideOpcode(unsigned Opc) {
345 switch (Opc) {
346 default:
347 llvm_unreachable("Opcode has no wide equivalent!");
348 case AArch64::STRBBui:
349 return AArch64::STRHHui;
350 case AArch64::STRHHui:
351 return AArch64::STRWui;
352 case AArch64::STURBBi:
353 return AArch64::STURHHi;
354 case AArch64::STURHHi:
355 return AArch64::STURWi;
356 case AArch64::STURWi:
357 return AArch64::STURXi;
358 case AArch64::STRWui:
359 return AArch64::STRXui;
360 }
361}
362
363static unsigned getMatchingPairOpcode(unsigned Opc) {
364 switch (Opc) {
365 default:
366 llvm_unreachable("Opcode has no pairwise equivalent!");
367 case AArch64::STRSui:
368 case AArch64::STURSi:
369 return AArch64::STPSi;
370 case AArch64::STRSpre:
371 return AArch64::STPSpre;
372 case AArch64::STRDui:
373 case AArch64::STURDi:
374 return AArch64::STPDi;
375 case AArch64::STRDpre:
376 return AArch64::STPDpre;
377 case AArch64::STRQui:
378 case AArch64::STURQi:
379 case AArch64::STR_ZXI:
380 return AArch64::STPQi;
381 case AArch64::STRQpre:
382 return AArch64::STPQpre;
383 case AArch64::STRWui:
384 case AArch64::STURWi:
385 return AArch64::STPWi;
386 case AArch64::STRWpre:
387 return AArch64::STPWpre;
388 case AArch64::STRXui:
389 case AArch64::STURXi:
390 return AArch64::STPXi;
391 case AArch64::STRXpre:
392 return AArch64::STPXpre;
393 case AArch64::LDRSui:
394 case AArch64::LDURSi:
395 return AArch64::LDPSi;
396 case AArch64::LDRSpre:
397 return AArch64::LDPSpre;
398 case AArch64::LDRDui:
399 case AArch64::LDURDi:
400 return AArch64::LDPDi;
401 case AArch64::LDRDpre:
402 return AArch64::LDPDpre;
403 case AArch64::LDRQui:
404 case AArch64::LDURQi:
405 case AArch64::LDR_ZXI:
406 return AArch64::LDPQi;
407 case AArch64::LDRQpre:
408 return AArch64::LDPQpre;
409 case AArch64::LDRWui:
410 case AArch64::LDURWi:
411 return AArch64::LDPWi;
412 case AArch64::LDRWpre:
413 return AArch64::LDPWpre;
414 case AArch64::LDRXui:
415 case AArch64::LDURXi:
416 return AArch64::LDPXi;
417 case AArch64::LDRXpre:
418 return AArch64::LDPXpre;
419 case AArch64::LDRSWui:
420 case AArch64::LDURSWi:
421 return AArch64::LDPSWi;
422 case AArch64::LDRSWpre:
423 return AArch64::LDPSWpre;
424 }
425}
426
429 unsigned LdOpc = LoadInst.getOpcode();
430 unsigned StOpc = StoreInst.getOpcode();
431 switch (LdOpc) {
432 default:
433 llvm_unreachable("Unsupported load instruction!");
434 case AArch64::LDRBBui:
435 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
436 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
437 case AArch64::LDURBBi:
438 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
439 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
440 case AArch64::LDRHHui:
441 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
442 StOpc == AArch64::STRXui;
443 case AArch64::LDURHHi:
444 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
445 StOpc == AArch64::STURXi;
446 case AArch64::LDRWui:
447 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
448 case AArch64::LDURWi:
449 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
450 case AArch64::LDRXui:
451 return StOpc == AArch64::STRXui;
452 case AArch64::LDURXi:
453 return StOpc == AArch64::STURXi;
454 }
455}
456
457static unsigned getPreIndexedOpcode(unsigned Opc) {
458 // FIXME: We don't currently support creating pre-indexed loads/stores when
459 // the load or store is the unscaled version. If we decide to perform such an
460 // optimization in the future the cases for the unscaled loads/stores will
461 // need to be added here.
462 switch (Opc) {
463 default:
464 llvm_unreachable("Opcode has no pre-indexed equivalent!");
465 case AArch64::STRBui:
466 return AArch64::STRBpre;
467 case AArch64::STRHui:
468 return AArch64::STRHpre;
469 case AArch64::STRSui:
470 return AArch64::STRSpre;
471 case AArch64::STRDui:
472 return AArch64::STRDpre;
473 case AArch64::STRQui:
474 return AArch64::STRQpre;
475 case AArch64::STRBBui:
476 return AArch64::STRBBpre;
477 case AArch64::STRHHui:
478 return AArch64::STRHHpre;
479 case AArch64::STRWui:
480 return AArch64::STRWpre;
481 case AArch64::STRXui:
482 return AArch64::STRXpre;
483 case AArch64::LDRBui:
484 return AArch64::LDRBpre;
485 case AArch64::LDRHui:
486 return AArch64::LDRHpre;
487 case AArch64::LDRSui:
488 return AArch64::LDRSpre;
489 case AArch64::LDRDui:
490 return AArch64::LDRDpre;
491 case AArch64::LDRQui:
492 return AArch64::LDRQpre;
493 case AArch64::LDRBBui:
494 return AArch64::LDRBBpre;
495 case AArch64::LDRHHui:
496 return AArch64::LDRHHpre;
497 case AArch64::LDRWui:
498 return AArch64::LDRWpre;
499 case AArch64::LDRXui:
500 return AArch64::LDRXpre;
501 case AArch64::LDRSWui:
502 return AArch64::LDRSWpre;
503 case AArch64::LDPSi:
504 return AArch64::LDPSpre;
505 case AArch64::LDPSWi:
506 return AArch64::LDPSWpre;
507 case AArch64::LDPDi:
508 return AArch64::LDPDpre;
509 case AArch64::LDPQi:
510 return AArch64::LDPQpre;
511 case AArch64::LDPWi:
512 return AArch64::LDPWpre;
513 case AArch64::LDPXi:
514 return AArch64::LDPXpre;
515 case AArch64::STPSi:
516 return AArch64::STPSpre;
517 case AArch64::STPDi:
518 return AArch64::STPDpre;
519 case AArch64::STPQi:
520 return AArch64::STPQpre;
521 case AArch64::STPWi:
522 return AArch64::STPWpre;
523 case AArch64::STPXi:
524 return AArch64::STPXpre;
525 case AArch64::STGi:
526 return AArch64::STGPreIndex;
527 case AArch64::STZGi:
528 return AArch64::STZGPreIndex;
529 case AArch64::ST2Gi:
530 return AArch64::ST2GPreIndex;
531 case AArch64::STZ2Gi:
532 return AArch64::STZ2GPreIndex;
533 case AArch64::STGPi:
534 return AArch64::STGPpre;
535 }
536}
537
538static unsigned getBaseAddressOpcode(unsigned Opc) {
539 // TODO: Add more index address stores.
540 switch (Opc) {
541 default:
542 llvm_unreachable("Opcode has no base address equivalent!");
543 case AArch64::LDRBroX:
544 return AArch64::LDRBui;
545 case AArch64::LDRBBroX:
546 return AArch64::LDRBBui;
547 case AArch64::LDRSBXroX:
548 return AArch64::LDRSBXui;
549 case AArch64::LDRSBWroX:
550 return AArch64::LDRSBWui;
551 case AArch64::LDRHroX:
552 return AArch64::LDRHui;
553 case AArch64::LDRHHroX:
554 return AArch64::LDRHHui;
555 case AArch64::LDRSHXroX:
556 return AArch64::LDRSHXui;
557 case AArch64::LDRSHWroX:
558 return AArch64::LDRSHWui;
559 case AArch64::LDRWroX:
560 return AArch64::LDRWui;
561 case AArch64::LDRSroX:
562 return AArch64::LDRSui;
563 case AArch64::LDRSWroX:
564 return AArch64::LDRSWui;
565 case AArch64::LDRDroX:
566 return AArch64::LDRDui;
567 case AArch64::LDRXroX:
568 return AArch64::LDRXui;
569 case AArch64::LDRQroX:
570 return AArch64::LDRQui;
571 }
572}
573
574static unsigned getPostIndexedOpcode(unsigned Opc) {
575 switch (Opc) {
576 default:
577 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
578 case AArch64::STRBui:
579 return AArch64::STRBpost;
580 case AArch64::STRHui:
581 return AArch64::STRHpost;
582 case AArch64::STRSui:
583 case AArch64::STURSi:
584 return AArch64::STRSpost;
585 case AArch64::STRDui:
586 case AArch64::STURDi:
587 return AArch64::STRDpost;
588 case AArch64::STRQui:
589 case AArch64::STURQi:
590 return AArch64::STRQpost;
591 case AArch64::STRBBui:
592 return AArch64::STRBBpost;
593 case AArch64::STRHHui:
594 return AArch64::STRHHpost;
595 case AArch64::STRWui:
596 case AArch64::STURWi:
597 return AArch64::STRWpost;
598 case AArch64::STRXui:
599 case AArch64::STURXi:
600 return AArch64::STRXpost;
601 case AArch64::LDRBui:
602 return AArch64::LDRBpost;
603 case AArch64::LDRHui:
604 return AArch64::LDRHpost;
605 case AArch64::LDRSui:
606 case AArch64::LDURSi:
607 return AArch64::LDRSpost;
608 case AArch64::LDRDui:
609 case AArch64::LDURDi:
610 return AArch64::LDRDpost;
611 case AArch64::LDRQui:
612 case AArch64::LDURQi:
613 return AArch64::LDRQpost;
614 case AArch64::LDRBBui:
615 return AArch64::LDRBBpost;
616 case AArch64::LDRHHui:
617 return AArch64::LDRHHpost;
618 case AArch64::LDRWui:
619 case AArch64::LDURWi:
620 return AArch64::LDRWpost;
621 case AArch64::LDRXui:
622 case AArch64::LDURXi:
623 return AArch64::LDRXpost;
624 case AArch64::LDRSWui:
625 return AArch64::LDRSWpost;
626 case AArch64::LDPSi:
627 return AArch64::LDPSpost;
628 case AArch64::LDPSWi:
629 return AArch64::LDPSWpost;
630 case AArch64::LDPDi:
631 return AArch64::LDPDpost;
632 case AArch64::LDPQi:
633 return AArch64::LDPQpost;
634 case AArch64::LDPWi:
635 return AArch64::LDPWpost;
636 case AArch64::LDPXi:
637 return AArch64::LDPXpost;
638 case AArch64::STPSi:
639 return AArch64::STPSpost;
640 case AArch64::STPDi:
641 return AArch64::STPDpost;
642 case AArch64::STPQi:
643 return AArch64::STPQpost;
644 case AArch64::STPWi:
645 return AArch64::STPWpost;
646 case AArch64::STPXi:
647 return AArch64::STPXpost;
648 case AArch64::STGi:
649 return AArch64::STGPostIndex;
650 case AArch64::STZGi:
651 return AArch64::STZGPostIndex;
652 case AArch64::ST2Gi:
653 return AArch64::ST2GPostIndex;
654 case AArch64::STZ2Gi:
655 return AArch64::STZ2GPostIndex;
656 case AArch64::STGPi:
657 return AArch64::STGPpost;
658 }
659}
660
662
663 unsigned OpcA = FirstMI.getOpcode();
664 unsigned OpcB = MI.getOpcode();
665
666 switch (OpcA) {
667 default:
668 return false;
669 case AArch64::STRSpre:
670 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
671 case AArch64::STRDpre:
672 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
673 case AArch64::STRQpre:
674 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
675 case AArch64::STRWpre:
676 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
677 case AArch64::STRXpre:
678 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
679 case AArch64::LDRSpre:
680 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
681 case AArch64::LDRDpre:
682 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
683 case AArch64::LDRQpre:
684 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
685 case AArch64::LDRWpre:
686 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
687 case AArch64::LDRXpre:
688 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
689 case AArch64::LDRSWpre:
690 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
691 }
692}
693
694// Returns the scale and offset range of pre/post indexed variants of MI.
695static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
696 int &MinOffset, int &MaxOffset) {
697 bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
698 bool IsTagStore = isTagStore(MI);
699 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
700 // as in the "unsigned offset" variant.
701 // All other pre/post indexed ldst instructions are unscaled.
702 Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
703
704 if (IsPaired) {
705 MinOffset = -64;
706 MaxOffset = 63;
707 } else {
708 MinOffset = -256;
709 MaxOffset = 255;
710 }
711}
712
714 unsigned PairedRegOp = 0) {
715 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
716 bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
717 if (IsPreLdSt)
718 PairedRegOp += 1;
719 unsigned Idx =
720 AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
721 return MI.getOperand(Idx);
722}
723
726 const AArch64InstrInfo *TII) {
727 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
728 int LoadSize = TII->getMemScale(LoadInst);
729 int StoreSize = TII->getMemScale(StoreInst);
730 int UnscaledStOffset =
731 TII->hasUnscaledLdStOffset(StoreInst)
734 int UnscaledLdOffset =
735 TII->hasUnscaledLdStOffset(LoadInst)
738 return (UnscaledStOffset <= UnscaledLdOffset) &&
739 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
740}
741
743 unsigned Opc = MI.getOpcode();
744 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
745 isNarrowStore(Opc)) &&
746 getLdStRegOp(MI).getReg() == AArch64::WZR;
747}
748
750 switch (MI.getOpcode()) {
751 default:
752 return false;
753 // Scaled instructions.
754 case AArch64::LDRBBui:
755 case AArch64::LDRHHui:
756 case AArch64::LDRWui:
757 case AArch64::LDRXui:
758 // Unscaled instructions.
759 case AArch64::LDURBBi:
760 case AArch64::LDURHHi:
761 case AArch64::LDURWi:
762 case AArch64::LDURXi:
763 return true;
764 }
765}
766
768 unsigned Opc = MI.getOpcode();
769 switch (Opc) {
770 default:
771 return false;
772 // Scaled instructions.
773 case AArch64::STRBui:
774 case AArch64::STRHui:
775 case AArch64::STRSui:
776 case AArch64::STRDui:
777 case AArch64::STRQui:
778 case AArch64::STRXui:
779 case AArch64::STRWui:
780 case AArch64::STRHHui:
781 case AArch64::STRBBui:
782 case AArch64::LDRBui:
783 case AArch64::LDRHui:
784 case AArch64::LDRSui:
785 case AArch64::LDRDui:
786 case AArch64::LDRQui:
787 case AArch64::LDRXui:
788 case AArch64::LDRWui:
789 case AArch64::LDRHHui:
790 case AArch64::LDRBBui:
791 case AArch64::STGi:
792 case AArch64::STZGi:
793 case AArch64::ST2Gi:
794 case AArch64::STZ2Gi:
795 case AArch64::STGPi:
796 // Unscaled instructions.
797 case AArch64::STURSi:
798 case AArch64::STURDi:
799 case AArch64::STURQi:
800 case AArch64::STURWi:
801 case AArch64::STURXi:
802 case AArch64::LDURSi:
803 case AArch64::LDURDi:
804 case AArch64::LDURQi:
805 case AArch64::LDURWi:
806 case AArch64::LDURXi:
807 // Paired instructions.
808 case AArch64::LDPSi:
809 case AArch64::LDPSWi:
810 case AArch64::LDPDi:
811 case AArch64::LDPQi:
812 case AArch64::LDPWi:
813 case AArch64::LDPXi:
814 case AArch64::STPSi:
815 case AArch64::STPDi:
816 case AArch64::STPQi:
817 case AArch64::STPWi:
818 case AArch64::STPXi:
819 // Make sure this is a reg+imm (as opposed to an address reloc).
821 return false;
822
823 // When using stack tagging, simple sp+imm loads and stores are not
824 // tag-checked, but pre- and post-indexed versions of them are, so we can't
825 // replace the former with the latter. This transformation would be valid
826 // if the load/store accesses an untagged stack slot, but we don't have
827 // that information available after frame indices have been eliminated.
828 if (AFI.isMTETagged() &&
829 AArch64InstrInfo::getLdStBaseOp(MI).getReg() == AArch64::SP)
830 return false;
831
832 return true;
833 }
834}
835
836// Make sure this is a reg+reg Ld/St
837static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
838 unsigned Opc = MI.getOpcode();
839 switch (Opc) {
840 default:
841 return false;
842 // Scaled instructions.
843 // TODO: Add more index address stores.
844 case AArch64::LDRBroX:
845 case AArch64::LDRBBroX:
846 case AArch64::LDRSBXroX:
847 case AArch64::LDRSBWroX:
848 Scale = 1;
849 return true;
850 case AArch64::LDRHroX:
851 case AArch64::LDRHHroX:
852 case AArch64::LDRSHXroX:
853 case AArch64::LDRSHWroX:
854 Scale = 2;
855 return true;
856 case AArch64::LDRWroX:
857 case AArch64::LDRSroX:
858 case AArch64::LDRSWroX:
859 Scale = 4;
860 return true;
861 case AArch64::LDRDroX:
862 case AArch64::LDRXroX:
863 Scale = 8;
864 return true;
865 case AArch64::LDRQroX:
866 Scale = 16;
867 return true;
868 }
869}
870
872 switch (MO.getParent()->getOpcode()) {
873 default:
874 return MO.isRenamable();
875 case AArch64::ORRWrs:
876 case AArch64::ADDWri:
877 return true;
878 }
879}
880
882AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
884 const LdStPairFlags &Flags) {
886 "Expected promotable zero stores.");
887
888 MachineBasicBlock::iterator E = I->getParent()->end();
890 // If NextI is the second of the two instructions to be merged, we need
891 // to skip one further. Either way we merge will invalidate the iterator,
892 // and we don't need to scan the new instruction, as it's a pairwise
893 // instruction, which we're not considering for further action anyway.
894 if (NextI == MergeMI)
895 NextI = next_nodbg(NextI, E);
896
897 unsigned Opc = I->getOpcode();
898 unsigned MergeMIOpc = MergeMI->getOpcode();
899 bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
900 bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc);
901 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
902 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
903
904 bool MergeForward = Flags.getMergeForward();
905 // Insert our new paired instruction after whichever of the paired
906 // instructions MergeForward indicates.
907 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
908 // Also based on MergeForward is from where we copy the base register operand
909 // so we get the flags compatible with the input code.
910 const MachineOperand &BaseRegOp =
911 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
912 : AArch64InstrInfo::getLdStBaseOp(*I);
913
914 // Which register is Rt and which is Rt2 depends on the offset order.
915 int64_t IOffsetInBytes =
916 AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride;
917 int64_t MIOffsetInBytes =
919 MergeMIOffsetStride;
920 // Select final offset based on the offset order.
921 int64_t OffsetImm;
922 if (IOffsetInBytes > MIOffsetInBytes)
923 OffsetImm = MIOffsetInBytes;
924 else
925 OffsetImm = IOffsetInBytes;
926
927 int NewOpcode = getMatchingWideOpcode(Opc);
928 // Adjust final offset on scaled stores because the new instruction
929 // has a different scale.
930 if (!TII->hasUnscaledLdStOffset(NewOpcode)) {
931 int NewOffsetStride = TII->getMemScale(NewOpcode);
932 assert(((OffsetImm % NewOffsetStride) == 0) &&
933 "Offset should be a multiple of the store memory scale");
934 OffsetImm = OffsetImm / NewOffsetStride;
935 }
936
937 // Construct the new instruction.
938 DebugLoc DL = I->getDebugLoc();
939 MachineBasicBlock *MBB = I->getParent();
940 MachineInstrBuilder MIB;
941 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(NewOpcode))
942 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
943 .add(BaseRegOp)
944 .addImm(OffsetImm)
945 .cloneMergedMemRefs({&*I, &*MergeMI})
946 .setMIFlags(I->mergeFlagsWith(*MergeMI));
947 (void)MIB;
948
949 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
950 LLVM_DEBUG(I->print(dbgs()));
951 LLVM_DEBUG(dbgs() << " ");
952 LLVM_DEBUG(MergeMI->print(dbgs()));
953 LLVM_DEBUG(dbgs() << " with instruction:\n ");
954 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
955 LLVM_DEBUG(dbgs() << "\n");
956
957 // Erase the old instructions.
958 I->eraseFromParent();
959 MergeMI->eraseFromParent();
960 return NextI;
961}
962
963// Apply Fn to all instructions between MI and the beginning of the block, until
964// a def for DefReg is reached. Returns true, iff Fn returns true for all
965// visited instructions. Stop after visiting Limit iterations.
967 const TargetRegisterInfo *TRI, unsigned Limit,
968 std::function<bool(MachineInstr &, bool)> &Fn) {
969 auto MBB = MI.getParent();
970 for (MachineInstr &I :
971 instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
972 if (!Limit)
973 return false;
974 --Limit;
975
976 bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
977 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
978 TRI->regsOverlap(MOP.getReg(), DefReg);
979 });
980 if (!Fn(I, isDef))
981 return false;
982 if (isDef)
983 break;
984 }
985 return true;
986}
987
989 const TargetRegisterInfo *TRI) {
990
991 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
992 if (MOP.isReg() && MOP.isKill())
993 Units.removeReg(MOP.getReg());
994
995 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
996 if (MOP.isReg() && !MOP.isKill())
997 Units.addReg(MOP.getReg());
998}
999
1000/// This function will add a new entry into the debugValueSubstitutions table
1001/// when two instruction have been merged into a new one represented by \p
1002/// MergedInstr.
1004 unsigned InstrNumToSet,
1005 MachineInstr &OriginalInstr,
1006 MachineInstr &MergedInstr) {
1007
1008 // Figure out the Operand Index of the destination register of the
1009 // OriginalInstr in the new MergedInstr.
1010 auto Reg = OriginalInstr.getOperand(0).getReg();
1011 unsigned OperandNo = 0;
1012 bool RegFound = false;
1013 for (const auto Op : MergedInstr.operands()) {
1014 if (Op.getReg() == Reg) {
1015 RegFound = true;
1016 break;
1017 }
1018 OperandNo++;
1019 }
1020
1021 if (RegFound)
1022 MF->makeDebugValueSubstitution({OriginalInstr.peekDebugInstrNum(), 0},
1023 {InstrNumToSet, OperandNo});
1024}
1025
1027AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
1029 const LdStPairFlags &Flags) {
1030 MachineBasicBlock::iterator E = I->getParent()->end();
1032 // If NextI is the second of the two instructions to be merged, we need
1033 // to skip one further. Either way we merge will invalidate the iterator,
1034 // and we don't need to scan the new instruction, as it's a pairwise
1035 // instruction, which we're not considering for further action anyway.
1036 if (NextI == Paired)
1037 NextI = next_nodbg(NextI, E);
1038
1039 int SExtIdx = Flags.getSExtIdx();
1040 unsigned Opc =
1041 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
1042 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
1043 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
1044
1045 bool MergeForward = Flags.getMergeForward();
1046
1047 std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
1048 if (RenameReg) {
1049 MCRegister RegToRename = getLdStRegOp(*I).getReg();
1050 DefinedInBB.addReg(*RenameReg);
1051
1052 // Return the sub/super register for RenameReg, matching the size of
1053 // OriginalReg.
1054 auto GetMatchingSubReg =
1055 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
1056 for (MCPhysReg SubOrSuper :
1057 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1058 if (C->contains(SubOrSuper))
1059 return SubOrSuper;
1060 }
1061 llvm_unreachable("Should have found matching sub or super register!");
1062 };
1063
1064 std::function<bool(MachineInstr &, bool)> UpdateMIs =
1065 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
1066 bool IsDef) {
1067 if (IsDef) {
1068 bool SeenDef = false;
1069 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1070 MachineOperand &MOP = MI.getOperand(OpIdx);
1071 // Rename the first explicit definition and all implicit
1072 // definitions matching RegToRename.
1073 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1074 (!MergeForward || !SeenDef ||
1075 (MOP.isDef() && MOP.isImplicit())) &&
1076 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1077 assert((MOP.isImplicit() ||
1078 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1079 "Need renamable operands");
1080 Register MatchingReg;
1081 if (const TargetRegisterClass *RC =
1082 MI.getRegClassConstraint(OpIdx, TII, TRI))
1083 MatchingReg = GetMatchingSubReg(RC);
1084 else {
1085 if (!isRewritableImplicitDef(MOP))
1086 continue;
1087 MatchingReg = GetMatchingSubReg(
1088 TRI->getMinimalPhysRegClass(MOP.getReg()));
1089 }
1090 MOP.setReg(MatchingReg);
1091 SeenDef = true;
1092 }
1093 }
1094 } else {
1095 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1096 MachineOperand &MOP = MI.getOperand(OpIdx);
1097 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1098 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1099 assert((MOP.isImplicit() ||
1100 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1101 "Need renamable operands");
1102 Register MatchingReg;
1103 if (const TargetRegisterClass *RC =
1104 MI.getRegClassConstraint(OpIdx, TII, TRI))
1105 MatchingReg = GetMatchingSubReg(RC);
1106 else
1107 MatchingReg = GetMatchingSubReg(
1108 TRI->getMinimalPhysRegClass(MOP.getReg()));
1109 assert(MatchingReg != AArch64::NoRegister &&
1110 "Cannot find matching regs for renaming");
1111 MOP.setReg(MatchingReg);
1112 }
1113 }
1114 }
1115 LLVM_DEBUG(dbgs() << "Renamed " << MI);
1116 return true;
1117 };
1118 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,
1119 TRI, UINT32_MAX, UpdateMIs);
1120
1121#if !defined(NDEBUG)
1122 // For forward merging store:
1123 // Make sure the register used for renaming is not used between the
1124 // paired instructions. That would trash the content before the new
1125 // paired instruction.
1126 MCPhysReg RegToCheck = *RenameReg;
1127 // For backward merging load:
1128 // Make sure the register being renamed is not used between the
1129 // paired instructions. That would trash the content after the new
1130 // paired instruction.
1131 if (!MergeForward)
1132 RegToCheck = RegToRename;
1133 for (auto &MI :
1134 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
1135 MergeForward ? std::next(I) : I,
1136 MergeForward ? std::next(Paired) : Paired))
1137 assert(all_of(MI.operands(),
1138 [this, RegToCheck](const MachineOperand &MOP) {
1139 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1140 MOP.isUndef() ||
1141 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1142 }) &&
1143 "Rename register used between paired instruction, trashing the "
1144 "content");
1145#endif
1146 }
1147
1148 // Insert our new paired instruction after whichever of the paired
1149 // instructions MergeForward indicates.
1150 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
1151 // Also based on MergeForward is from where we copy the base register operand
1152 // so we get the flags compatible with the input code.
1153 const MachineOperand &BaseRegOp =
1154 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
1155 : AArch64InstrInfo::getLdStBaseOp(*I);
1156
1158 int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
1159 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
1160 if (IsUnscaled != PairedIsUnscaled) {
1161 // We're trying to pair instructions that differ in how they are scaled. If
1162 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
1163 // the opposite (i.e., make Paired's offset unscaled).
1164 int MemSize = TII->getMemScale(*Paired);
1165 if (PairedIsUnscaled) {
1166 // If the unscaled offset isn't a multiple of the MemSize, we can't
1167 // pair the operations together.
1168 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
1169 "Offset should be a multiple of the stride!");
1170 PairedOffset /= MemSize;
1171 } else {
1172 PairedOffset *= MemSize;
1173 }
1174 }
1175
1176 // Which register is Rt and which is Rt2 depends on the offset order.
1177 // However, for pre load/stores the Rt should be the one of the pre
1178 // load/store.
1179 MachineInstr *RtMI, *Rt2MI;
1180 if (Offset == PairedOffset + OffsetStride &&
1182 RtMI = &*Paired;
1183 Rt2MI = &*I;
1184 // Here we swapped the assumption made for SExtIdx.
1185 // I.e., we turn ldp I, Paired into ldp Paired, I.
1186 // Update the index accordingly.
1187 if (SExtIdx != -1)
1188 SExtIdx = (SExtIdx + 1) % 2;
1189 } else {
1190 RtMI = &*I;
1191 Rt2MI = &*Paired;
1192 }
1193 int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
1194 // Scale the immediate offset, if necessary.
1195 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
1196 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
1197 "Unscaled offset cannot be scaled.");
1198 OffsetImm /= TII->getMemScale(*RtMI);
1199 }
1200
1201 // Construct the new instruction.
1202 MachineInstrBuilder MIB;
1203 DebugLoc DL = I->getDebugLoc();
1204 MachineBasicBlock *MBB = I->getParent();
1205 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
1206 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
1207 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1208 // Kill flags may become invalid when moving stores for pairing.
1209 if (RegOp0.isUse()) {
1210 if (!MergeForward) {
1211 // Clear kill flags on store if moving upwards. Example:
1212 // STRWui kill %w0, ...
1213 // USE %w1
1214 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
1215 // We are about to move the store of w1, so its kill flag may become
1216 // invalid; not the case for w0.
1217 // Since w1 is used between the stores, the kill flag on w1 is cleared
1218 // after merging.
1219 // STPWi kill %w0, %w1, ...
1220 // USE %w1
1221 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
1222 if (It->readsRegister(PairedRegOp.getReg(), TRI))
1223 PairedRegOp.setIsKill(false);
1224 } else {
1225 // Clear kill flags of the first stores register. Example:
1226 // STRWui %w1, ...
1227 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1228 // STRW %w0
1230 for (MachineInstr &MI :
1231 make_range(std::next(I->getIterator()), Paired->getIterator()))
1232 MI.clearRegisterKills(Reg, TRI);
1233 }
1234 }
1235
1236 unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1237 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1238
1239 // Adds the pre-index operand for pre-indexed ld/st pairs.
1240 if (AArch64InstrInfo::isPreLdSt(*RtMI))
1241 MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1242
1243 MIB.add(RegOp0)
1244 .add(RegOp1)
1245 .add(BaseRegOp)
1246 .addImm(OffsetImm)
1247 .cloneMergedMemRefs({&*I, &*Paired})
1248 .setMIFlags(I->mergeFlagsWith(*Paired));
1249
1250 (void)MIB;
1251
1252 LLVM_DEBUG(
1253 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1254 LLVM_DEBUG(I->print(dbgs()));
1255 LLVM_DEBUG(dbgs() << " ");
1256 LLVM_DEBUG(Paired->print(dbgs()));
1257 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1258 if (SExtIdx != -1) {
1259 // Generate the sign extension for the proper result of the ldp.
1260 // I.e., with X1, that would be:
1261 // %w1 = KILL %w1, implicit-def %x1
1262 // %x1 = SBFMXri killed %x1, 0, 31
1263 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1264 // Right now, DstMO has the extended register, since it comes from an
1265 // extended opcode.
1266 Register DstRegX = DstMO.getReg();
1267 // Get the W variant of that register.
1268 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1269 // Update the result of LDP to use the W instead of the X variant.
1270 DstMO.setReg(DstRegW);
1271 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1272 LLVM_DEBUG(dbgs() << "\n");
1273 // Make the machine verifier happy by providing a definition for
1274 // the X register.
1275 // Insert this definition right after the generated LDP, i.e., before
1276 // InsertionPoint.
1277 MachineInstrBuilder MIBKill =
1278 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1279 .addReg(DstRegW)
1280 .addReg(DstRegX, RegState::Define);
1281 MIBKill->getOperand(2).setImplicit();
1282 // Create the sign extension.
1283 MachineInstrBuilder MIBSXTW =
1284 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1285 .addReg(DstRegX)
1286 .addImm(0)
1287 .addImm(31);
1288 (void)MIBSXTW;
1289
1290 // In the case of a sign-extend, where we have something like:
1291 // debugValueSubstitutions:[]
1292 // $w1 = LDRWui $x0, 1, debug-instr-number 1
1293 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1294 // $x0 = LDRSWui $x0, 0, debug-instr-number 2
1295 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1296
1297 // It will be converted to:
1298 // debugValueSubstitutions:[]
1299 // $w0, $w1 = LDPWi $x0, 0
1300 // $w0 = KILL $w0, implicit-def $x0
1301 // $x0 = SBFMXri $x0, 0, 31
1302 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1303 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1304
1305 // We want the final result to look like:
1306 // debugValueSubstitutions:
1307 // - { srcinst: 1, srcop: 0, dstinst: 4, dstop: 1, subreg: 0 }
1308 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1309 // $w0, $w1 = LDPWi $x0, 0, debug-instr-number 4
1310 // $w0 = KILL $w0, implicit-def $x0
1311 // $x0 = SBFMXri $x0, 0, 31, debug-instr-number 3
1312 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1313 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1314
1315 // $x0 is where the final value is stored, so the sign extend (SBFMXri)
1316 // instruction contains the final value we care about we give it a new
1317 // debug-instr-number 3. Whereas, $w1 contains the final value that we care
1318 // about, therefore the LDP instruction is also given a new
1319 // debug-instr-number 4. We have to add these substitutions to the
1320 // debugValueSubstitutions table. However, we also have to ensure that the
1321 // OpIndex that pointed to debug-instr-number 1 gets updated to 1, because
1322 // $w1 is the second operand of the LDP instruction.
1323
1324 if (I->peekDebugInstrNum()) {
1325 // If I is the instruction which got sign extended and has a
1326 // debug-instr-number, give the SBFMXri instruction a new
1327 // debug-instr-number, and update the debugValueSubstitutions table with
1328 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1329 // instruction a new debug-instr-number, and update the
1330 // debugValueSubstitutions table with the new debug-instr-number and
1331 // OpIndex pair.
1332 unsigned NewInstrNum;
1333 if (DstRegX == I->getOperand(0).getReg()) {
1334 NewInstrNum = MIBSXTW->getDebugInstrNum();
1335 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I,
1336 *MIBSXTW);
1337 } else {
1338 NewInstrNum = MIB->getDebugInstrNum();
1339 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I, *MIB);
1340 }
1341 }
1342 if (Paired->peekDebugInstrNum()) {
1343 // If Paired is the instruction which got sign extended and has a
1344 // debug-instr-number, give the SBFMXri instruction a new
1345 // debug-instr-number, and update the debugValueSubstitutions table with
1346 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1347 // instruction a new debug-instr-number, and update the
1348 // debugValueSubstitutions table with the new debug-instr-number and
1349 // OpIndex pair.
1350 unsigned NewInstrNum;
1351 if (DstRegX == Paired->getOperand(0).getReg()) {
1352 NewInstrNum = MIBSXTW->getDebugInstrNum();
1353 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1354 *MIBSXTW);
1355 } else {
1356 NewInstrNum = MIB->getDebugInstrNum();
1357 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1358 *MIB);
1359 }
1360 }
1361
1362 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1363 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1364 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1365 // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1366 // variant of the registers.
1367 MachineOperand &MOp0 = MIB->getOperand(0);
1368 MachineOperand &MOp1 = MIB->getOperand(1);
1369 assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
1370 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1371 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1372 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1373 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1374 } else {
1375
1376 // In the case that the merge doesn't result in a sign-extend, if we have
1377 // something like:
1378 // debugValueSubstitutions:[]
1379 // $x1 = LDRXui $x0, 1, debug-instr-number 1
1380 // DBG_INSTR_REF !13, dbg-instr-ref(1, 0), debug-location !11
1381 // $x0 = LDRXui killed $x0, 0, debug-instr-number 2
1382 // DBG_INSTR_REF !14, dbg-instr-ref(2, 0), debug-location !11
1383
1384 // It will be converted to:
1385 // debugValueSubstitutions: []
1386 // $x0, $x1 = LDPXi $x0, 0
1387 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1388 // DBG_INSTR_REF !13, dbg-instr-ref(2, 0), debug-location !14
1389
1390 // We want the final result to look like:
1391 // debugValueSubstitutions:
1392 // - { srcinst: 1, srcop: 0, dstinst: 3, dstop: 1, subreg: 0 }
1393 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1394 // $x0, $x1 = LDPXi $x0, 0, debug-instr-number 3
1395 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1396 // DBG_INSTR_REF !12, dbg-instr-ref(2, 0), debug-location !14
1397
1398 // Here all that needs to be done is, that the LDP instruction needs to be
1399 // updated with a new debug-instr-number, we then need to add entries into
1400 // the debugSubstitutions table to map the old instr-refs to the new ones.
1401
1402 // Assign new DebugInstrNum to the Paired instruction.
1403 if (I->peekDebugInstrNum()) {
1404 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1405 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *I,
1406 *MIB);
1407 }
1408 if (Paired->peekDebugInstrNum()) {
1409 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1410 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *Paired,
1411 *MIB);
1412 }
1413
1414 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1415 }
1416 LLVM_DEBUG(dbgs() << "\n");
1417
1418 if (MergeForward)
1419 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1420 if (MOP.isReg() && MOP.isKill())
1421 DefinedInBB.addReg(MOP.getReg());
1422
1423 // Copy over any implicit-def operands. This is like MI.copyImplicitOps, but
1424 // only copies implicit defs and makes sure that each operand is only added
1425 // once in case of duplicates.
1426 auto CopyImplicitOps = [&](MachineBasicBlock::iterator MI1,
1428 SmallSetVector<Register, 4> Ops;
1429 for (const MachineOperand &MO :
1430 llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))
1431 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1432 Ops.insert(MO.getReg());
1433 for (const MachineOperand &MO :
1434 llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))
1435 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1436 Ops.insert(MO.getReg());
1437 for (auto Op : Ops)
1438 MIB.addDef(Op, RegState::Implicit);
1439 };
1440 CopyImplicitOps(I, Paired);
1441
1442 // Erase the old instructions.
1443 I->eraseFromParent();
1444 Paired->eraseFromParent();
1445
1446 return NextI;
1447}
1448
1450AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1453 next_nodbg(LoadI, LoadI->getParent()->end());
1454
1455 int LoadSize = TII->getMemScale(*LoadI);
1456 int StoreSize = TII->getMemScale(*StoreI);
1457 Register LdRt = getLdStRegOp(*LoadI).getReg();
1458 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1459 Register StRt = getLdStRegOp(*StoreI).getReg();
1460 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1461
1462 assert((IsStoreXReg ||
1463 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1464 "Unexpected RegClass");
1465
1466 MachineInstr *BitExtMI;
1467 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1468 // Remove the load, if the destination register of the loads is the same
1469 // register for stored value.
1470 if (StRt == LdRt && LoadSize == 8) {
1471 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1472 LoadI->getIterator())) {
1473 if (MI.killsRegister(StRt, TRI)) {
1474 MI.clearRegisterKills(StRt, TRI);
1475 break;
1476 }
1477 }
1478 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1479 LLVM_DEBUG(LoadI->print(dbgs()));
1480 LLVM_DEBUG(dbgs() << "\n");
1481 LoadI->eraseFromParent();
1482 return NextI;
1483 }
1484 // Replace the load with a mov if the load and store are in the same size.
1485 BitExtMI =
1486 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1487 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1488 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1489 .add(StMO)
1491 .setMIFlags(LoadI->getFlags());
1492 } else {
1493 // FIXME: Currently we disable this transformation in big-endian targets as
1494 // performance and correctness are verified only in little-endian.
1495 if (!Subtarget->isLittleEndian())
1496 return NextI;
1497 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1498 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1499 "Unsupported ld/st match");
1500 assert(LoadSize <= StoreSize && "Invalid load size");
1501 int UnscaledLdOffset =
1502 IsUnscaled
1504 : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1505 int UnscaledStOffset =
1506 IsUnscaled
1508 : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1509 int Width = LoadSize * 8;
1510 Register DestReg =
1511 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1512 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1513 : LdRt;
1514
1515 assert((UnscaledLdOffset >= UnscaledStOffset &&
1516 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1517 "Invalid offset");
1518
1519 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1520 int Imms = Immr + Width - 1;
1521 if (UnscaledLdOffset == UnscaledStOffset) {
1522 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1523 | ((Immr) << 6) // immr
1524 | ((Imms) << 0) // imms
1525 ;
1526
1527 BitExtMI =
1528 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1529 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1530 DestReg)
1531 .add(StMO)
1532 .addImm(AndMaskEncoded)
1533 .setMIFlags(LoadI->getFlags());
1534 } else if (IsStoreXReg && Imms == 31) {
1535 // Use the 32 bit variant of UBFM if it's the LSR alias of the
1536 // instruction.
1537 assert(Immr <= Imms && "Expected LSR alias of UBFM");
1538 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1539 TII->get(AArch64::UBFMWri),
1540 TRI->getSubReg(DestReg, AArch64::sub_32))
1541 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1542 .addImm(Immr)
1543 .addImm(Imms)
1544 .setMIFlags(LoadI->getFlags());
1545 } else {
1546 BitExtMI =
1547 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1548 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1549 DestReg)
1550 .add(StMO)
1551 .addImm(Immr)
1552 .addImm(Imms)
1553 .setMIFlags(LoadI->getFlags());
1554 }
1555 }
1556
1557 // Clear kill flags between store and load.
1558 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1559 BitExtMI->getIterator()))
1560 if (MI.killsRegister(StRt, TRI)) {
1561 MI.clearRegisterKills(StRt, TRI);
1562 break;
1563 }
1564
1565 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1566 LLVM_DEBUG(StoreI->print(dbgs()));
1567 LLVM_DEBUG(dbgs() << " ");
1568 LLVM_DEBUG(LoadI->print(dbgs()));
1569 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1570 LLVM_DEBUG(StoreI->print(dbgs()));
1571 LLVM_DEBUG(dbgs() << " ");
1572 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1573 LLVM_DEBUG(dbgs() << "\n");
1574
1575 // Erase the old instructions.
1576 LoadI->eraseFromParent();
1577 return NextI;
1578}
1579
1580static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1581 // Convert the byte-offset used by unscaled into an "element" offset used
1582 // by the scaled pair load/store instructions.
1583 if (IsUnscaled) {
1584 // If the byte-offset isn't a multiple of the stride, there's no point
1585 // trying to match it.
1586 if (Offset % OffsetStride)
1587 return false;
1588 Offset /= OffsetStride;
1589 }
1590 return Offset <= 63 && Offset >= -64;
1591}
1592
1593// Do alignment, specialized to power of 2 and for signed ints,
1594// avoiding having to do a C-style cast from uint_64t to int when
1595// using alignTo from include/llvm/Support/MathExtras.h.
1596// FIXME: Move this function to include/MathExtras.h?
1597static int alignTo(int Num, int PowOf2) {
1598 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1599}
1600
1601static bool mayAlias(MachineInstr &MIa,
1603 AliasAnalysis *AA) {
1604 for (MachineInstr *MIb : MemInsns) {
1605 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
1606 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
1607 return true;
1608 }
1609 }
1610
1611 LLVM_DEBUG(dbgs() << "No aliases found\n");
1612 return false;
1613}
1614
1615bool AArch64LoadStoreOpt::findMatchingStore(
1616 MachineBasicBlock::iterator I, unsigned Limit,
1618 MachineBasicBlock::iterator B = I->getParent()->begin();
1620 MachineInstr &LoadMI = *I;
1622
1623 // If the load is the first instruction in the block, there's obviously
1624 // not any matching store.
1625 if (MBBI == B)
1626 return false;
1627
1628 // Track which register units have been modified and used between the first
1629 // insn and the second insn.
1630 ModifiedRegUnits.clear();
1631 UsedRegUnits.clear();
1632
1633 unsigned Count = 0;
1634 do {
1635 MBBI = prev_nodbg(MBBI, B);
1636 MachineInstr &MI = *MBBI;
1637
1638 // Don't count transient instructions towards the search limit since there
1639 // may be different numbers of them if e.g. debug information is present.
1640 if (!MI.isTransient())
1641 ++Count;
1642
1643 // If the load instruction reads directly from the address to which the
1644 // store instruction writes and the stored value is not modified, we can
1645 // promote the load. Since we do not handle stores with pre-/post-index,
1646 // it's unnecessary to check if BaseReg is modified by the store itself.
1647 // Also we can't handle stores without an immediate offset operand,
1648 // while the operand might be the address for a global variable.
1649 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1652 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1653 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1654 StoreI = MBBI;
1655 return true;
1656 }
1657
1658 if (MI.isCall())
1659 return false;
1660
1661 // Update modified / uses register units.
1662 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1663
1664 // Otherwise, if the base register is modified, we have no match, so
1665 // return early.
1666 if (!ModifiedRegUnits.available(BaseReg))
1667 return false;
1668
1669 // If we encounter a store aliased with the load, return early.
1670 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1671 return false;
1672 } while (MBBI != B && Count < Limit);
1673 return false;
1674}
1675
1676static bool needsWinCFI(const MachineFunction *MF) {
1677 return MF->getTarget().getMCAsmInfo().usesWindowsCFI() &&
1679}
1680
1681// Returns true if FirstMI and MI are candidates for merging or pairing.
1682// Otherwise, returns false.
1684 LdStPairFlags &Flags,
1685 const AArch64InstrInfo *TII) {
1686 // If this is volatile or if pairing is suppressed, not a candidate.
1687 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1688 return false;
1689
1690 // We should have already checked FirstMI for pair suppression and volatility.
1691 assert(!FirstMI.hasOrderedMemoryRef() &&
1692 !TII->isLdStPairSuppressed(FirstMI) &&
1693 "FirstMI shouldn't get here if either of these checks are true.");
1694
1695 if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1697 return false;
1698
1699 unsigned OpcA = FirstMI.getOpcode();
1700 unsigned OpcB = MI.getOpcode();
1701
1702 // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1703 if (OpcA == OpcB)
1704 return !AArch64InstrInfo::isPreLdSt(FirstMI);
1705
1706 // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1707 // allow pairing them with other instructions.
1708 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1709 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1710 return false;
1711
1712 // Two pre ld/st of different opcodes cannot be merged either
1714 return false;
1715
1716 // Try to match a sign-extended load/store with a zero-extended load/store.
1717 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1718 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1719 assert(IsValidLdStrOpc &&
1720 "Given Opc should be a Load or Store with an immediate");
1721 // OpcA will be the first instruction in the pair.
1722 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1723 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1724 return true;
1725 }
1726
1727 // If the second instruction isn't even a mergable/pairable load/store, bail
1728 // out.
1729 if (!PairIsValidLdStrOpc)
1730 return false;
1731
1732 // Narrow stores do not have a matching pair opcodes, so constrain their
1733 // merging to zero stores.
1734 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1735 return getLdStRegOp(FirstMI).getReg() == AArch64::WZR &&
1736 getLdStRegOp(MI).getReg() == AArch64::WZR &&
1737 TII->getMemScale(FirstMI) == TII->getMemScale(MI);
1738
1739 // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1740 // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
1741 // are candidate pairs that can be merged.
1742 if (isPreLdStPairCandidate(FirstMI, MI))
1743 return true;
1744
1745 // Try to match an unscaled load/store with a scaled load/store.
1746 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1748
1749 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1750}
1751
1752static bool canRenameMOP(const MachineOperand &MOP,
1753 const TargetRegisterInfo *TRI) {
1754 if (MOP.isReg()) {
1755 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1756 // Renaming registers with multiple disjunct sub-registers (e.g. the
1757 // result of a LD3) means that all sub-registers are renamed, potentially
1758 // impacting other instructions we did not check. Bail out.
1759 // Note that this relies on the structure of the AArch64 register file. In
1760 // particular, a subregister cannot be written without overwriting the
1761 // whole register.
1762 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1763 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1764 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1765 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1766 LLVM_DEBUG(
1767 dbgs()
1768 << " Cannot rename operands with multiple disjunct subregisters ("
1769 << MOP << ")\n");
1770 return false;
1771 }
1772
1773 // We cannot rename arbitrary implicit-defs, the specific rule to rewrite
1774 // them must be known. For example, in ORRWrs the implicit-def
1775 // corresponds to the result register.
1776 if (MOP.isImplicit() && MOP.isDef()) {
1777 if (!isRewritableImplicitDef(MOP))
1778 return false;
1779 return TRI->isSuperOrSubRegisterEq(
1780 MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
1781 }
1782 }
1783 return MOP.isImplicit() ||
1784 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1785}
1786
1787static bool
1790 const TargetRegisterInfo *TRI) {
1791 if (!FirstMI.mayStore())
1792 return false;
1793
1794 // Check if we can find an unused register which we can use to rename
1795 // the register used by the first load/store.
1796
1797 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1798 // For now, we only rename if the store operand gets killed at the store.
1799 if (!getLdStRegOp(FirstMI).isKill() &&
1800 !any_of(FirstMI.operands(),
1801 [TRI, RegToRename](const MachineOperand &MOP) {
1802 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1803 MOP.isImplicit() && MOP.isKill() &&
1804 TRI->regsOverlap(RegToRename, MOP.getReg());
1805 })) {
1806 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
1807 return false;
1808 }
1809
1810 bool FoundDef = false;
1811
1812 // For each instruction between FirstMI and the previous def for RegToRename,
1813 // we
1814 // * check if we can rename RegToRename in this instruction
1815 // * collect the registers used and required register classes for RegToRename.
1816 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1817 bool IsDef) {
1818 LLVM_DEBUG(dbgs() << "Checking " << MI);
1819 // Currently we do not try to rename across frame-setup instructions.
1820 if (MI.getFlag(MachineInstr::FrameSetup)) {
1821 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1822 << "currently\n");
1823 return false;
1824 }
1825
1826 UsedInBetween.accumulate(MI);
1827
1828 // For a definition, check that we can rename the definition and exit the
1829 // loop.
1830 FoundDef = IsDef;
1831
1832 // For defs, check if we can rename the first def of RegToRename.
1833 if (FoundDef) {
1834 // For some pseudo instructions, we might not generate code in the end
1835 // (e.g. KILL) and we would end up without a correct def for the rename
1836 // register.
1837 // TODO: This might be overly conservative and we could handle those cases
1838 // in multiple ways:
1839 // 1. Insert an extra copy, to materialize the def.
1840 // 2. Skip pseudo-defs until we find an non-pseudo def.
1841 if (MI.isPseudo()) {
1842 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
1843 return false;
1844 }
1845
1846 for (auto &MOP : MI.operands()) {
1847 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1848 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1849 continue;
1850 if (!canRenameMOP(MOP, TRI)) {
1851 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1852 return false;
1853 }
1854 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1855 }
1856 return true;
1857 } else {
1858 for (auto &MOP : MI.operands()) {
1859 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1860 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1861 continue;
1862
1863 if (!canRenameMOP(MOP, TRI)) {
1864 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1865 return false;
1866 }
1867 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1868 }
1869 }
1870 return true;
1871 };
1872
1873 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1874 return false;
1875
1876 if (!FoundDef) {
1877 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1878 return false;
1879 }
1880 return true;
1881}
1882
1883// We want to merge the second load into the first by rewriting the usages of
1884// the same reg between first (incl.) and second (excl.). We don't need to care
1885// about any insns before FirstLoad or after SecondLoad.
1886// 1. The second load writes new value into the same reg.
1887// - The renaming is impossible to impact later use of the reg.
1888// - The second load always trash the value written by the first load which
1889// means the reg must be killed before the second load.
1890// 2. The first load must be a def for the same reg so we don't need to look
1891// into anything before it.
1893 MachineInstr &FirstLoad, MachineInstr &SecondLoad,
1894 LiveRegUnits &UsedInBetween,
1896 const TargetRegisterInfo *TRI) {
1897 if (FirstLoad.isPseudo())
1898 return false;
1899
1900 UsedInBetween.accumulate(FirstLoad);
1901 auto RegToRename = getLdStRegOp(FirstLoad).getReg();
1902 bool Success = std::all_of(
1903 FirstLoad.getIterator(), SecondLoad.getIterator(),
1904 [&](MachineInstr &MI) {
1905 LLVM_DEBUG(dbgs() << "Checking " << MI);
1906 // Currently we do not try to rename across frame-setup instructions.
1907 if (MI.getFlag(MachineInstr::FrameSetup)) {
1908 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1909 << "currently\n");
1910 return false;
1911 }
1912
1913 for (auto &MOP : MI.operands()) {
1914 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1915 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1916 continue;
1917 if (!canRenameMOP(MOP, TRI)) {
1918 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1919 return false;
1920 }
1921 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1922 }
1923
1924 return true;
1925 });
1926 return Success;
1927}
1928
1929// Check if we can find a physical register for renaming \p Reg. This register
1930// must:
1931// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1932// defined registers up to the point where the renamed register will be used,
1933// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1934// registers in the range the rename register will be used,
1935// * is available in all used register classes (checked using RequiredClasses).
1936static std::optional<MCPhysReg> tryToFindRegisterToRename(
1937 const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1938 LiveRegUnits &UsedInBetween,
1940 const TargetRegisterInfo *TRI) {
1942
1943 // Checks if any sub- or super-register of PR is callee saved.
1944 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1945 return any_of(TRI->sub_and_superregs_inclusive(PR),
1946 [&MF, TRI](MCPhysReg SubOrSuper) {
1947 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1948 });
1949 };
1950
1951 // Check if PR or one of its sub- or super-registers can be used for all
1952 // required register classes.
1953 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1954 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1955 return any_of(
1956 TRI->sub_and_superregs_inclusive(PR),
1957 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1958 });
1959 };
1960
1961 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1962 for (const MCPhysReg &PR : *RegClass) {
1963 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1964 !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1965 CanBeUsedForAllClasses(PR)) {
1966 DefinedInBB.addReg(PR);
1967 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1968 << "\n");
1969 return {PR};
1970 }
1971 }
1972 LLVM_DEBUG(dbgs() << "No rename register found from "
1973 << TRI->getRegClassName(RegClass) << "\n");
1974 return std::nullopt;
1975}
1976
1977// For store pairs: returns a register from FirstMI to the beginning of the
1978// block that can be renamed.
1979// For load pairs: returns a register from FirstMI to MI that can be renamed.
1980static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
1981 std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
1982 Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
1984 const TargetRegisterInfo *TRI) {
1985 std::optional<MCPhysReg> RenameReg;
1986 if (!DebugCounter::shouldExecute(RegRenamingCounter))
1987 return RenameReg;
1988
1989 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1990 MachineFunction &MF = *FirstMI.getParent()->getParent();
1991 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1992 return RenameReg;
1993
1994 const bool IsLoad = FirstMI.mayLoad();
1995
1996 if (!MaybeCanRename) {
1997 if (IsLoad)
1998 MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween,
1999 RequiredClasses, TRI)};
2000 else
2001 MaybeCanRename = {
2002 canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
2003 }
2004
2005 if (*MaybeCanRename) {
2006 RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween,
2007 RequiredClasses, TRI);
2008 }
2009 return RenameReg;
2010}
2011
2012/// Scan the instructions looking for a load/store that can be combined with the
2013/// current instruction into a wider equivalent or a load/store pair.
2015AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
2016 LdStPairFlags &Flags, unsigned Limit,
2017 bool FindNarrowMerge) {
2018 MachineBasicBlock::iterator E = I->getParent()->end();
2020 MachineBasicBlock::iterator MBBIWithRenameReg;
2021 MachineInstr &FirstMI = *I;
2022 MBBI = next_nodbg(MBBI, E);
2023
2024 bool MayLoad = FirstMI.mayLoad();
2025 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
2026 Register Reg = getLdStRegOp(FirstMI).getReg();
2029 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
2030 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
2031
2032 std::optional<bool> MaybeCanRename;
2033 if (!EnableRenaming)
2034 MaybeCanRename = {false};
2035
2036 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2037 LiveRegUnits UsedInBetween;
2038 UsedInBetween.init(*TRI);
2039
2040 Flags.clearRenameReg();
2041
2042 // Track which register units have been modified and used between the first
2043 // insn (inclusive) and the second insn.
2044 ModifiedRegUnits.clear();
2045 UsedRegUnits.clear();
2046
2047 // Remember any instructions that read/write memory between FirstMI and MI.
2048 SmallVector<MachineInstr *, 4> MemInsns;
2049
2050 LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
2051 for (unsigned Count = 0; MBBI != E && Count < Limit;
2052 MBBI = next_nodbg(MBBI, E)) {
2053 MachineInstr &MI = *MBBI;
2054 LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
2055
2056 UsedInBetween.accumulate(MI);
2057
2058 // Don't count transient instructions towards the search limit since there
2059 // may be different numbers of them if e.g. debug information is present.
2060 if (!MI.isTransient())
2061 ++Count;
2062
2063 Flags.setSExtIdx(-1);
2064 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
2066 assert(MI.mayLoadOrStore() && "Expected memory operation.");
2067 // If we've found another instruction with the same opcode, check to see
2068 // if the base and offset are compatible with our starting instruction.
2069 // These instructions all have scaled immediate operands, so we just
2070 // check for +1/-1. Make sure to check the new instruction offset is
2071 // actually an immediate and not a symbolic reference destined for
2072 // a relocation.
2075 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
2076 if (IsUnscaled != MIIsUnscaled) {
2077 // We're trying to pair instructions that differ in how they are scaled.
2078 // If FirstMI is scaled then scale the offset of MI accordingly.
2079 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
2080 int MemSize = TII->getMemScale(MI);
2081 if (MIIsUnscaled) {
2082 // If the unscaled offset isn't a multiple of the MemSize, we can't
2083 // pair the operations together: bail and keep looking.
2084 if (MIOffset % MemSize) {
2085 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2086 UsedRegUnits, TRI);
2087 MemInsns.push_back(&MI);
2088 continue;
2089 }
2090 MIOffset /= MemSize;
2091 } else {
2092 MIOffset *= MemSize;
2093 }
2094 }
2095
2096 bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
2097
2098 if (BaseReg == MIBaseReg) {
2099 // If the offset of the second ld/st is not equal to the size of the
2100 // destination register it can’t be paired with a pre-index ld/st
2101 // pair. Additionally if the base reg is used or modified the operations
2102 // can't be paired: bail and keep looking.
2103 if (IsPreLdSt) {
2104 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
2105 bool IsBaseRegUsed = !UsedRegUnits.available(
2107 bool IsBaseRegModified = !ModifiedRegUnits.available(
2109 // If the stored value and the address of the second instruction is
2110 // the same, it needs to be using the updated register and therefore
2111 // it must not be folded.
2112 bool IsMIRegTheSame =
2113 TRI->regsOverlap(getLdStRegOp(MI).getReg(),
2115 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2116 IsMIRegTheSame) {
2117 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2118 UsedRegUnits, TRI);
2119 MemInsns.push_back(&MI);
2120 continue;
2121 }
2122 } else {
2123 if ((Offset != MIOffset + OffsetStride) &&
2124 (Offset + OffsetStride != MIOffset)) {
2125 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2126 UsedRegUnits, TRI);
2127 MemInsns.push_back(&MI);
2128 continue;
2129 }
2130 }
2131
2132 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
2133 if (FindNarrowMerge) {
2134 // If the alignment requirements of the scaled wide load/store
2135 // instruction can't express the offset of the scaled narrow input,
2136 // bail and keep looking. For promotable zero stores, allow only when
2137 // the stored value is the same (i.e., WZR).
2138 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
2139 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
2140 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2141 UsedRegUnits, TRI);
2142 MemInsns.push_back(&MI);
2143 continue;
2144 }
2145 } else {
2146 // Pairwise instructions have a 7-bit signed offset field. Single
2147 // insns have a 12-bit unsigned offset field. If the resultant
2148 // immediate offset of merging these instructions is out of range for
2149 // a pairwise instruction, bail and keep looking.
2150 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
2151 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2152 UsedRegUnits, TRI);
2153 MemInsns.push_back(&MI);
2154 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
2155 << "keep looking.\n");
2156 continue;
2157 }
2158 // If the alignment requirements of the paired (scaled) instruction
2159 // can't express the offset of the unscaled input, bail and keep
2160 // looking.
2161 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
2162 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2163 UsedRegUnits, TRI);
2164 MemInsns.push_back(&MI);
2166 << "Offset doesn't fit due to alignment requirements, "
2167 << "keep looking.\n");
2168 continue;
2169 }
2170 }
2171
2172 // If the BaseReg has been modified, then we cannot do the optimization.
2173 // For example, in the following pattern
2174 // ldr x1 [x2]
2175 // ldr x2 [x3]
2176 // ldr x4 [x2, #8],
2177 // the first and third ldr cannot be converted to ldp x1, x4, [x2]
2178 if (!ModifiedRegUnits.available(BaseReg))
2179 return E;
2180
2181 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
2183
2184 // If the Rt of the second instruction (destination register of the
2185 // load) was not modified or used between the two instructions and none
2186 // of the instructions between the second and first alias with the
2187 // second, we can combine the second into the first.
2188 bool RtNotModified =
2189 ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
2190 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
2191 !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
2192
2193 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
2194 << "Reg '" << getLdStRegOp(MI) << "' not modified: "
2195 << (RtNotModified ? "true" : "false") << "\n"
2196 << "Reg '" << getLdStRegOp(MI) << "' not used: "
2197 << (RtNotUsed ? "true" : "false") << "\n");
2198
2199 if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
2200 // For pairs loading into the same reg, try to find a renaming
2201 // opportunity to allow the renaming of Reg between FirstMI and MI
2202 // and combine MI into FirstMI; otherwise bail and keep looking.
2203 if (SameLoadReg) {
2204 std::optional<MCPhysReg> RenameReg =
2205 findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI,
2206 Reg, DefinedInBB, UsedInBetween,
2207 RequiredClasses, TRI);
2208 if (!RenameReg) {
2209 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2210 UsedRegUnits, TRI);
2211 MemInsns.push_back(&MI);
2212 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
2213 << "keep looking.\n");
2214 continue;
2215 }
2216 Flags.setRenameReg(*RenameReg);
2217 }
2218
2219 Flags.setMergeForward(false);
2220 if (!SameLoadReg)
2221 Flags.clearRenameReg();
2222 return MBBI;
2223 }
2224
2225 // Likewise, if the Rt of the first instruction is not modified or used
2226 // between the two instructions and none of the instructions between the
2227 // first and the second alias with the first, we can combine the first
2228 // into the second.
2229 RtNotModified = !(
2230 MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
2231
2232 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
2233 << "Reg '" << getLdStRegOp(FirstMI)
2234 << "' not modified: "
2235 << (RtNotModified ? "true" : "false") << "\n");
2236
2237 if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
2238 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
2239 Flags.setMergeForward(true);
2240 Flags.clearRenameReg();
2241 return MBBI;
2242 }
2243
2244 std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair(
2245 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
2246 RequiredClasses, TRI);
2247 if (RenameReg) {
2248 Flags.setMergeForward(true);
2249 Flags.setRenameReg(*RenameReg);
2250 MBBIWithRenameReg = MBBI;
2251 }
2252 }
2253 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
2254 << "interference in between, keep looking.\n");
2255 }
2256 }
2257
2258 if (Flags.getRenameReg())
2259 return MBBIWithRenameReg;
2260
2261 // If the instruction wasn't a matching load or store. Stop searching if we
2262 // encounter a call instruction that might modify memory.
2263 if (MI.isCall()) {
2264 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
2265 return E;
2266 }
2267
2268 // Update modified / uses register units.
2269 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2270
2271 // Otherwise, if the base register is modified, we have no match, so
2272 // return early.
2273 if (!ModifiedRegUnits.available(BaseReg)) {
2274 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
2275 return E;
2276 }
2277
2278 // Update list of instructions that read/write memory.
2279 if (MI.mayLoadOrStore())
2280 MemInsns.push_back(&MI);
2281 }
2282 return E;
2283}
2284
2287 assert((MI.getOpcode() == AArch64::SUBXri ||
2288 MI.getOpcode() == AArch64::ADDXri) &&
2289 "Expected a register update instruction");
2290 auto End = MI.getParent()->end();
2291 if (MaybeCFI == End ||
2292 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2293 !(MI.getFlag(MachineInstr::FrameSetup) ||
2294 MI.getFlag(MachineInstr::FrameDestroy)) ||
2295 MI.getOperand(0).getReg() != AArch64::SP)
2296 return End;
2297
2298 const MachineFunction &MF = *MI.getParent()->getParent();
2299 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2300 const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
2301 switch (CFI.getOperation()) {
2304 return MaybeCFI;
2305 default:
2306 return End;
2307 }
2308}
2309
2310std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2312 bool IsForward, bool IsPreIdx, bool MergeEither) {
2313 assert((Update->getOpcode() == AArch64::ADDXri ||
2314 Update->getOpcode() == AArch64::SUBXri) &&
2315 "Unexpected base register update instruction to merge!");
2316 MachineBasicBlock::iterator E = I->getParent()->end();
2318
2319 // If updating the SP and the following instruction is CFA offset related CFI,
2320 // make sure the CFI follows the SP update either by merging at the location
2321 // of the update or by moving the CFI after the merged instruction. If unable
2322 // to do so, bail.
2323 MachineBasicBlock::iterator InsertPt = I;
2324 if (IsForward) {
2325 assert(IsPreIdx);
2326 if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
2327 if (MergeEither) {
2328 InsertPt = Update;
2329 } else {
2330 // Take care not to reorder CFIs.
2331 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
2332 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2333 }))
2334 return std::nullopt;
2335
2336 MachineBasicBlock *MBB = InsertPt->getParent();
2337 MBB->splice(std::next(InsertPt), MBB, CFI);
2338 }
2339 }
2340 }
2341
2342 // Return the instruction following the merged instruction, which is
2343 // the instruction following our unmerged load. Unless that's the add/sub
2344 // instruction we're merging, in which case it's the one after that.
2345 if (NextI == Update)
2346 NextI = next_nodbg(NextI, E);
2347
2348 int Value = Update->getOperand(2).getImm();
2349 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
2350 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2351 if (Update->getOpcode() == AArch64::SUBXri)
2352 Value = -Value;
2353
2354 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
2356 MachineInstrBuilder MIB;
2357 int Scale, MinOffset, MaxOffset;
2358 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
2360 // Non-paired instruction.
2361 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2362 TII->get(NewOpc))
2363 .add(Update->getOperand(0))
2364 .add(getLdStRegOp(*I))
2366 .addImm(Value / Scale)
2367 .setMemRefs(I->memoperands())
2368 .setMIFlags(I->mergeFlagsWith(*Update));
2369 } else {
2370 // Paired instruction.
2371 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2372 TII->get(NewOpc))
2373 .add(Update->getOperand(0))
2374 .add(getLdStRegOp(*I, 0))
2375 .add(getLdStRegOp(*I, 1))
2377 .addImm(Value / Scale)
2378 .setMemRefs(I->memoperands())
2379 .setMIFlags(I->mergeFlagsWith(*Update));
2380 }
2381
2382 if (IsPreIdx) {
2383 ++NumPreFolded;
2384 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
2385 } else {
2386 ++NumPostFolded;
2387 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
2388 }
2389 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2390 LLVM_DEBUG(I->print(dbgs()));
2391 LLVM_DEBUG(dbgs() << " ");
2392 LLVM_DEBUG(Update->print(dbgs()));
2393 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2394 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
2395 LLVM_DEBUG(dbgs() << "\n");
2396
2397 // Erase the old instructions for the block.
2398 I->eraseFromParent();
2399 Update->eraseFromParent();
2400
2401 return NextI;
2402}
2403
2405AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
2407 unsigned Offset, int Scale) {
2408 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2409 "Unexpected const mov instruction to merge!");
2410 MachineBasicBlock::iterator E = I->getParent()->end();
2412 MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
2413 MachineInstr &MemMI = *I;
2414 unsigned Mask = (1 << 12) * Scale - 1;
2415 unsigned Low = Offset & Mask;
2416 unsigned High = Offset - Low;
2419 MachineInstrBuilder AddMIB, MemMIB;
2420
2421 // Add IndexReg, BaseReg, High (the BaseReg may be SP)
2422 AddMIB =
2423 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2424 .addDef(IndexReg)
2425 .addUse(BaseReg)
2426 .addImm(High >> 12) // shifted value
2427 .addImm(12); // shift 12
2428 (void)AddMIB;
2429 // Ld/St DestReg, IndexReg, Imm12
2430 unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
2431 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2432 .add(getLdStRegOp(MemMI))
2434 .addImm(Low / Scale)
2435 .setMemRefs(I->memoperands())
2436 .setMIFlags(I->mergeFlagsWith(*Update));
2437 (void)MemMIB;
2438
2439 ++NumConstOffsetFolded;
2440 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2441 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2442 LLVM_DEBUG(PrevI->print(dbgs()));
2443 LLVM_DEBUG(dbgs() << " ");
2444 LLVM_DEBUG(Update->print(dbgs()));
2445 LLVM_DEBUG(dbgs() << " ");
2446 LLVM_DEBUG(I->print(dbgs()));
2447 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2448 LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
2449 LLVM_DEBUG(dbgs() << " ");
2450 LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
2451 LLVM_DEBUG(dbgs() << "\n");
2452
2453 // Erase the old instructions for the block.
2454 I->eraseFromParent();
2455 PrevI->eraseFromParent();
2456 Update->eraseFromParent();
2457
2458 return NextI;
2459}
2460
2461bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2462 MachineInstr &MI,
2463 unsigned BaseReg, int Offset) {
2464 switch (MI.getOpcode()) {
2465 default:
2466 break;
2467 case AArch64::SUBXri:
2468 case AArch64::ADDXri:
2469 // Make sure it's a vanilla immediate operand, not a relocation or
2470 // anything else we can't handle.
2471 if (!MI.getOperand(2).isImm())
2472 break;
2473 // Watch out for 1 << 12 shifted value.
2474 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
2475 break;
2476
2477 // The update instruction source and destination register must be the
2478 // same as the load/store base register.
2479 if (MI.getOperand(0).getReg() != BaseReg ||
2480 MI.getOperand(1).getReg() != BaseReg)
2481 break;
2482
2483 int UpdateOffset = MI.getOperand(2).getImm();
2484 if (MI.getOpcode() == AArch64::SUBXri)
2485 UpdateOffset = -UpdateOffset;
2486
2487 // The immediate must be a multiple of the scaling factor of the pre/post
2488 // indexed instruction.
2489 int Scale, MinOffset, MaxOffset;
2490 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
2491 if (UpdateOffset % Scale != 0)
2492 break;
2493
2494 // Scaled offset must fit in the instruction immediate.
2495 int ScaledOffset = UpdateOffset / Scale;
2496 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2497 break;
2498
2499 // If we have a non-zero Offset, we check that it matches the amount
2500 // we're adding to the register.
2501 if (!Offset || Offset == UpdateOffset)
2502 return true;
2503 break;
2504 }
2505 return false;
2506}
2507
2508bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2509 MachineInstr &MI,
2510 unsigned IndexReg,
2511 unsigned &Offset) {
2512 // The update instruction source and destination register must be the
2513 // same as the load/store index register.
2514 if (MI.getOpcode() == AArch64::MOVKWi &&
2515 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2516
2517 // movz + movk hold a large offset of a Ld/St instruction.
2518 MachineBasicBlock::iterator B = MI.getParent()->begin();
2520 // Skip the scene when the MI is the first instruction of a block.
2521 if (MBBI == B)
2522 return false;
2523 MBBI = prev_nodbg(MBBI, B);
2524 MachineInstr &MovzMI = *MBBI;
2525 // Make sure the MOVKWi and MOVZWi set the same register.
2526 if (MovzMI.getOpcode() == AArch64::MOVZWi &&
2527 MovzMI.getOperand(0).getReg() == MI.getOperand(0).getReg()) {
2528 unsigned Low = MovzMI.getOperand(1).getImm();
2529 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2530 Offset = High + Low;
2531 // 12-bit optionally shifted immediates are legal for adds.
2532 return Offset >> 24 == 0;
2533 }
2534 }
2535 return false;
2536}
2537
2538MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
2539 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
2540 MachineBasicBlock::iterator E = I->getParent()->end();
2541 MachineInstr &MemMI = *I;
2543
2545 int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
2546 TII->getMemScale(MemMI);
2547
2548 // Scan forward looking for post-index opportunities. Updating instructions
2549 // can't be formed if the memory instruction doesn't have the offset we're
2550 // looking for.
2551 if (MIUnscaledOffset != UnscaledOffset)
2552 return E;
2553
2554 // If the base register overlaps a source/destination register, we can't
2555 // merge the update. This does not apply to tag store instructions which
2556 // ignore the address part of the source register.
2557 // This does not apply to STGPi as well, which does not have unpredictable
2558 // behavior in this case unlike normal stores, and always performs writeback
2559 // after reading the source register value.
2560 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
2561 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2562 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2563 Register DestReg = getLdStRegOp(MemMI, i).getReg();
2564 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2565 return E;
2566 }
2567 }
2568
2569 // Track which register units have been modified and used between the first
2570 // insn (inclusive) and the second insn.
2571 ModifiedRegUnits.clear();
2572 UsedRegUnits.clear();
2573 MBBI = next_nodbg(MBBI, E);
2574
2575 // We can't post-increment the stack pointer if any instruction between
2576 // the memory access (I) and the increment (MBBI) can access the memory
2577 // region defined by [SP, MBBI].
2578 const bool BaseRegSP = BaseReg == AArch64::SP;
2579 if (BaseRegSP && needsWinCFI(I->getMF())) {
2580 // FIXME: For now, we always block the optimization over SP in windows
2581 // targets as it requires to adjust the unwind/debug info, messing up
2582 // the unwind info can actually cause a miscompile.
2583 return E;
2584 }
2585
2586 unsigned Count = 0;
2587 MachineBasicBlock *CurMBB = I->getParent();
2588 // choice of next block to visit is liveins-based
2589 bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
2590
2591 while (true) {
2592 for (MachineBasicBlock::iterator CurEnd = CurMBB->end();
2593 MBBI != CurEnd && Count < Limit; MBBI = next_nodbg(MBBI, CurEnd)) {
2594 MachineInstr &MI = *MBBI;
2595
2596 // Don't count transient instructions towards the search limit since there
2597 // may be different numbers of them if e.g. debug information is present.
2598 if (!MI.isTransient())
2599 ++Count;
2600
2601 // If we found a match, return it.
2602 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2603 return MBBI;
2604
2605 // Update the status of what the instruction clobbered and used.
2606 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2607 TRI);
2608
2609 // Otherwise, if the base register is used or modified, we have no match,
2610 // so return early. If we are optimizing SP, do not allow instructions
2611 // that may load or store in between the load and the optimized value
2612 // update.
2613 if (!ModifiedRegUnits.available(BaseReg) ||
2614 !UsedRegUnits.available(BaseReg) ||
2615 (BaseRegSP && MBBI->mayLoadOrStore()))
2616 return E;
2617 }
2618
2619 if (!VisitSucc || Limit <= Count)
2620 break;
2621
2622 // Try to go downward to successors along a CF path w/o side enters
2623 // such that BaseReg is alive along it but not at its exits
2624 MachineBasicBlock *SuccToVisit = nullptr;
2625 unsigned LiveSuccCount = 0;
2626 for (MachineBasicBlock *Succ : CurMBB->successors()) {
2627 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {
2628 if (Succ->isLiveIn(*AI)) {
2629 if (LiveSuccCount++)
2630 return E;
2631 if (Succ->pred_size() == 1)
2632 SuccToVisit = Succ;
2633 break;
2634 }
2635 }
2636 }
2637 if (!SuccToVisit)
2638 break;
2639 CurMBB = SuccToVisit;
2640 MBBI = CurMBB->begin();
2641 }
2642
2643 return E;
2644}
2645
2646MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2647 MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
2648 MachineBasicBlock::iterator B = I->getParent()->begin();
2649 MachineBasicBlock::iterator E = I->getParent()->end();
2650 MachineInstr &MemMI = *I;
2652 MachineFunction &MF = *MemMI.getMF();
2653
2656
2657 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2658 Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
2659 IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
2660 : AArch64::NoRegister};
2661
2662 // If the load/store is the first instruction in the block, there's obviously
2663 // not any matching update. Ditto if the memory offset isn't zero.
2664 if (MBBI == B || Offset != 0)
2665 return E;
2666 // If the base register overlaps a destination register, we can't
2667 // merge the update.
2668 if (!isTagStore(MemMI)) {
2669 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
2670 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
2671 return E;
2672 }
2673
2674 const bool BaseRegSP = BaseReg == AArch64::SP;
2675 if (BaseRegSP && needsWinCFI(I->getMF())) {
2676 // FIXME: For now, we always block the optimization over SP in windows
2677 // targets as it requires to adjust the unwind/debug info, messing up
2678 // the unwind info can actually cause a miscompile.
2679 return E;
2680 }
2681
2682 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2683 unsigned RedZoneSize =
2684 Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2685
2686 // Track which register units have been modified and used between the first
2687 // insn (inclusive) and the second insn.
2688 ModifiedRegUnits.clear();
2689 UsedRegUnits.clear();
2690 unsigned Count = 0;
2691 bool MemAccessBeforeSPPreInc = false;
2692 MergeEither = true;
2693 do {
2694 MBBI = prev_nodbg(MBBI, B);
2695 MachineInstr &MI = *MBBI;
2696
2697 // Don't count transient instructions towards the search limit since there
2698 // may be different numbers of them if e.g. debug information is present.
2699 if (!MI.isTransient())
2700 ++Count;
2701
2702 // If we found a match, return it.
2703 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2704 // Check that the update value is within our red zone limit (which may be
2705 // zero).
2706 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2707 return E;
2708 return MBBI;
2709 }
2710
2711 // Update the status of what the instruction clobbered and used.
2712 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2713
2714 // Otherwise, if the base register is used or modified, we have no match, so
2715 // return early.
2716 if (!ModifiedRegUnits.available(BaseReg) ||
2717 !UsedRegUnits.available(BaseReg))
2718 return E;
2719
2720 // If we have a destination register (i.e. a load instruction) and a
2721 // destination register is used or modified, then we can only merge forward,
2722 // i.e. the combined instruction is put in the place of the memory
2723 // instruction. Same applies if we see a memory access or side effects.
2724 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
2725 (DestReg[0] != AArch64::NoRegister &&
2726 !(ModifiedRegUnits.available(DestReg[0]) &&
2727 UsedRegUnits.available(DestReg[0]))) ||
2728 (DestReg[1] != AArch64::NoRegister &&
2729 !(ModifiedRegUnits.available(DestReg[1]) &&
2730 UsedRegUnits.available(DestReg[1]))))
2731 MergeEither = false;
2732
2733 // Keep track if we have a memory access before an SP pre-increment, in this
2734 // case we need to validate later that the update amount respects the red
2735 // zone.
2736 if (BaseRegSP && MBBI->mayLoadOrStore())
2737 MemAccessBeforeSPPreInc = true;
2738 } while (MBBI != B && Count < Limit);
2739 return E;
2740}
2741
2743AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2744 MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2745 MachineBasicBlock::iterator B = I->getParent()->begin();
2746 MachineBasicBlock::iterator E = I->getParent()->end();
2747 MachineInstr &MemMI = *I;
2749
2750 // If the load is the first instruction in the block, there's obviously
2751 // not any matching load or store.
2752 if (MBBI == B)
2753 return E;
2754
2755 // Make sure the IndexReg is killed and the shift amount is zero.
2756 // TODO: Relex this restriction to extend, simplify processing now.
2757 if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
2758 !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
2760 return E;
2761
2763
2764 // Track which register units have been modified and used between the first
2765 // insn (inclusive) and the second insn.
2766 ModifiedRegUnits.clear();
2767 UsedRegUnits.clear();
2768 unsigned Count = 0;
2769 do {
2770 MBBI = prev_nodbg(MBBI, B);
2771 MachineInstr &MI = *MBBI;
2772
2773 // Don't count transient instructions towards the search limit since there
2774 // may be different numbers of them if e.g. debug information is present.
2775 if (!MI.isTransient())
2776 ++Count;
2777
2778 // If we found a match, return it.
2779 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2780 return MBBI;
2781 }
2782
2783 // Update the status of what the instruction clobbered and used.
2784 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2785
2786 // Otherwise, if the index register is used or modified, we have no match,
2787 // so return early.
2788 if (!ModifiedRegUnits.available(IndexReg) ||
2789 !UsedRegUnits.available(IndexReg))
2790 return E;
2791
2792 } while (MBBI != B && Count < Limit);
2793 return E;
2794}
2795
2796bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2798 MachineInstr &MI = *MBBI;
2799 // If this is a volatile load, don't mess with it.
2800 if (MI.hasOrderedMemoryRef())
2801 return false;
2802
2803 if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2804 return false;
2805
2806 // Make sure this is a reg+imm.
2807 // FIXME: It is possible to extend it to handle reg+reg cases.
2809 return false;
2810
2811 // Look backward up to LdStLimit instructions.
2813 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2814 ++NumLoadsFromStoresPromoted;
2815 // Promote the load. Keeping the iterator straight is a
2816 // pain, so we let the merge routine tell us what the next instruction
2817 // is after it's done mucking about.
2818 MBBI = promoteLoadFromStore(MBBI, StoreI);
2819 return true;
2820 }
2821 return false;
2822}
2823
2824// Merge adjacent zero stores into a wider store.
2825bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2827 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2828 MachineInstr &MI = *MBBI;
2829 MachineBasicBlock::iterator E = MI.getParent()->end();
2830
2831 if (!TII->isCandidateToMergeOrPair(MI))
2832 return false;
2833
2834 // Look ahead up to LdStLimit instructions for a mergeable instruction.
2835 LdStPairFlags Flags;
2837 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2838 if (MergeMI != E) {
2839 ++NumZeroStoresPromoted;
2840
2841 // Keeping the iterator straight is a pain, so we let the merge routine tell
2842 // us what the next instruction is after it's done mucking about.
2843 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2844 return true;
2845 }
2846 return false;
2847}
2848
2849// Find loads and stores that can be merged into a single load or store pair
2850// instruction.
2851bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2852 MachineInstr &MI = *MBBI;
2853 MachineBasicBlock::iterator E = MI.getParent()->end();
2854
2855 if (!TII->isCandidateToMergeOrPair(MI))
2856 return false;
2857
2858 // If disable-ldp feature is opted, do not emit ldp.
2859 if (MI.mayLoad() && Subtarget->hasDisableLdp())
2860 return false;
2861
2862 // If disable-stp feature is opted, do not emit stp.
2863 if (MI.mayStore() && Subtarget->hasDisableStp())
2864 return false;
2865
2866 // Early exit if the offset is not possible to match. (6 bits of positive
2867 // range, plus allow an extra one in case we find a later insn that matches
2868 // with Offset-1)
2869 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2871 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2872 // Allow one more for offset.
2873 if (Offset > 0)
2874 Offset -= OffsetStride;
2875 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2876 return false;
2877
2878 // Look ahead up to LdStLimit instructions for a pairable instruction.
2879 LdStPairFlags Flags;
2881 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2882 if (Paired != E) {
2883 // Keeping the iterator straight is a pain, so we let the merge routine tell
2884 // us what the next instruction is after it's done mucking about.
2885 auto Prev = std::prev(MBBI);
2886
2887 // Fetch the memoperand of the load/store that is a candidate for
2888 // combination.
2889 MachineMemOperand *MemOp =
2890 MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2891
2892 // If a load/store arrives and ldp/stp-aligned-only feature is opted, check
2893 // that the alignment of the source pointer is at least double the alignment
2894 // of the type.
2895 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2896 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2897 // If there is no size/align information, cancel the transformation.
2898 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2899 NumFailedAlignmentCheck++;
2900 return false;
2901 }
2902
2903 // Get the needed alignments to check them if
2904 // ldp-aligned-only/stp-aligned-only features are opted.
2905 uint64_t MemAlignment = MemOp->getAlign().value();
2906 uint64_t TypeAlignment =
2907 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2908
2909 if (MemAlignment < 2 * TypeAlignment) {
2910 NumFailedAlignmentCheck++;
2911 return false;
2912 }
2913 }
2914
2915 ++NumPairCreated;
2916 if (TII->hasUnscaledLdStOffset(MI))
2917 ++NumUnscaledPairCreated;
2918
2919 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2920 // Collect liveness info for instructions between Prev and the new position
2921 // MBBI.
2922 for (auto I = std::next(Prev); I != MBBI; I++)
2923 updateDefinedRegisters(*I, DefinedInBB, TRI);
2924
2925 return true;
2926 }
2927 return false;
2928}
2929
2930bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2932 MachineInstr &MI = *MBBI;
2933 MachineBasicBlock::iterator E = MI.getParent()->end();
2935
2936 // Do not form post-inc addressing mode for volatile accesses. Instructions
2937 // performing register writeback do not set a valid instruction syndrome,
2938 // making it impossible to handle MMIO in protected hypervisors.
2939 // Exclude accesses based on the stack pointer, as these can't be MMIO.
2940 // Also exclude MTE tag store instructions.
2941 if (MBBI->hasOrderedMemoryRef() &&
2942 AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP &&
2943 !isTagStore(MI) && MI.getOpcode() != AArch64::STGPi)
2944 return false;
2945
2946 // Look forward to try to form a post-index instruction. For example,
2947 // ldr x0, [x20]
2948 // add x20, x20, #32
2949 // merged into:
2950 // ldr x0, [x20], #32
2951 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2952 if (Update != E) {
2953 // Merge the update into the ld/st.
2954 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2955 /*IsPreIdx=*/false,
2956 /*MergeEither=*/false)) {
2957 MBBI = *NextI;
2958 return true;
2959 }
2960 }
2961
2962 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2963 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2964 return false;
2965
2966 // Look back to try to find a pre-index instruction. For example,
2967 // add x0, x0, #8
2968 // ldr x1, [x0]
2969 // merged into:
2970 // ldr x1, [x0, #8]!
2971 bool MergeEither;
2972 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
2973 if (Update != E) {
2974 // Merge the update into the ld/st.
2975 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
2976 /*IsPreIdx=*/true, MergeEither)) {
2977 MBBI = *NextI;
2978 return true;
2979 }
2980 }
2981
2982 // The immediate in the load/store is scaled by the size of the memory
2983 // operation. The immediate in the add we're looking for,
2984 // however, is not, so adjust here.
2985 int UnscaledOffset =
2987
2988 // Look forward to try to find a pre-index instruction. For example,
2989 // ldr x1, [x0, #64]
2990 // add x0, x0, #64
2991 // merged into:
2992 // ldr x1, [x0, #64]!
2993 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2994 if (Update != E) {
2995 // Merge the update into the ld/st.
2996 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2997 /*IsPreIdx=*/true,
2998 /*MergeEither=*/false)) {
2999 MBBI = *NextI;
3000 return true;
3001 }
3002 }
3003
3004 return false;
3005}
3006
3007bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
3008 int Scale) {
3009 MachineInstr &MI = *MBBI;
3010 MachineBasicBlock::iterator E = MI.getParent()->end();
3012
3013 // Don't know how to handle unscaled pre/post-index versions below, so bail.
3014 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
3015 return false;
3016
3017 // Look back to try to find a const offset for index LdSt instruction. For
3018 // example,
3019 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3020 // ldr x1, [x0, x8]
3021 // merged into:
3022 // add x8, x0, a * (1<<12)
3023 // ldr x1, [x8, imm12]
3024 unsigned Offset;
3025 Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
3026 if (Update != E && (Offset & (Scale - 1)) == 0) {
3027 // Merge the imm12 into the ld/st.
3028 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
3029 return true;
3030 }
3031
3032 return false;
3033}
3034
3035// Map a GPR store opcode to its FPR equivalent at the same data width.
3036// Returns 0 if no mapping exists.
3037static unsigned getGPRToFPRStoreOpcode(unsigned GPRStoreOpc) {
3038 switch (GPRStoreOpc) {
3039 // Unsigned immediate.
3040 case AArch64::STRBBui:
3041 return AArch64::STRBui;
3042 case AArch64::STRHHui:
3043 return AArch64::STRHui;
3044 case AArch64::STRWui:
3045 return AArch64::STRSui;
3046 case AArch64::STRXui:
3047 return AArch64::STRDui;
3048 // Unscaled immediate.
3049 case AArch64::STURBBi:
3050 return AArch64::STURBi;
3051 case AArch64::STURHHi:
3052 return AArch64::STURHi;
3053 case AArch64::STURWi:
3054 return AArch64::STURSi;
3055 case AArch64::STURXi:
3056 return AArch64::STURDi;
3057 // Register offset.
3058 case AArch64::STRBBroW:
3059 return AArch64::STRBroW;
3060 case AArch64::STRBBroX:
3061 return AArch64::STRBroX;
3062 case AArch64::STRHHroW:
3063 return AArch64::STRHroW;
3064 case AArch64::STRHHroX:
3065 return AArch64::STRHroX;
3066 case AArch64::STRWroW:
3067 return AArch64::STRSroW;
3068 case AArch64::STRWroX:
3069 return AArch64::STRSroX;
3070 case AArch64::STRXroW:
3071 return AArch64::STRDroW;
3072 case AArch64::STRXroX:
3073 return AArch64::STRDroX;
3074 default:
3075 return 0;
3076 }
3077}
3078
3079// Given a UMOV-lane-0 opcode, return the sub-register index to extract from
3080// the vector register, or 0 if the opcode is not a supported UMOV.
3081static unsigned getUMOVSubRegIdx(unsigned UMOVOpc) {
3082 switch (UMOVOpc) {
3083 case AArch64::UMOVvi8_idx0:
3084 return AArch64::bsub;
3085 case AArch64::UMOVvi16_idx0:
3086 return AArch64::hsub;
3087 case AArch64::UMOVvi32_idx0:
3088 return AArch64::ssub;
3089 case AArch64::UMOVvi64_idx0:
3090 return AArch64::dsub;
3091 default:
3092 return 0;
3093 }
3094}
3095
3096bool AArch64LoadStoreOpt::tryToReplaceUMOVStore(
3098 MachineInstr &StoreMI = *MBBI;
3099
3100 unsigned FPRStoreOpc = getGPRToFPRStoreOpcode(StoreMI.getOpcode());
3101 if (!FPRStoreOpc)
3102 return false;
3103
3104 if (StoreMI.hasOrderedMemoryRef())
3105 return false;
3106
3107 MachineBasicBlock *MBB = StoreMI.getParent();
3108 MCPhysReg StoreValReg = StoreMI.getOperand(0).getReg();
3109
3110 if (!StoreMI.getOperand(0).isKill())
3111 return false;
3112
3113 // Bail out if the store uses the value register elsewhere (e.g., as the base
3114 // address in `str w8, [x8, #0]`).
3115 for (unsigned I = 1, E = StoreMI.getNumExplicitOperands(); I < E; ++I)
3116 if (StoreMI.getOperand(I).isReg() &&
3117 TRI->regsOverlap(StoreMI.getOperand(I).getReg(), StoreValReg))
3118 return false;
3119
3120 // Scan backward to find the UMOV that defines the store's value register.
3121 MachineInstr *UMOVMI = nullptr;
3123 unsigned SubRegIdx = 0;
3124 unsigned Count = 0;
3125 for (auto It = MBBI; It != B;) {
3126 MachineInstr &MI = *--It;
3127 if (MI.isDebugInstr())
3128 continue;
3129 if (++Count > UMOVFoldLimit)
3130 return false;
3131 if (MI.readsRegister(StoreValReg, TRI))
3132 return false;
3133 if (MI.modifiesRegister(StoreValReg, TRI)) {
3134 SubRegIdx = getUMOVSubRegIdx(MI.getOpcode());
3135 if (!SubRegIdx)
3136 return false;
3137 UMOVMI = &MI;
3138 break;
3139 }
3140 }
3141 if (!UMOVMI)
3142 return false;
3143 MCPhysReg VecReg = UMOVMI->getOperand(1).getReg();
3144
3145 // Check that no instruction between UMOV and store clobbers the vector
3146 // register. Also track whether VecReg is killed anywhere from the UMOV
3147 // (inclusive) through the intervening instructions -- we need this to decide
3148 // whether the FPR sub-register can be marked killed on the new store.
3149 bool VecRegKilled = UMOVMI->killsRegister(VecReg, TRI);
3150 for (auto It = std::next(UMOVMI->getIterator()); It != MBBI; ++It) {
3151 if (It->modifiesRegister(VecReg, TRI))
3152 return false;
3153 if (!VecRegKilled && It->killsRegister(VecReg, TRI))
3154 VecRegKilled = true;
3155 }
3156
3157 // Safe to proceed. Clear kill flags on the vector register between UMOV and
3158 // the new store so the FPR sub-register stays live.
3159 UMOVMI->clearRegisterKills(VecReg, TRI);
3160 for (auto It = std::next(UMOVMI->getIterator()); It != MBBI; ++It)
3161 It->clearRegisterKills(VecReg, TRI);
3162
3163 LLVM_DEBUG(dbgs() << "Folding UMOV + store: " << *UMOVMI << " + "
3164 << StoreMI);
3165
3166 MCPhysReg FPRReg = TRI->getSubReg(VecReg, SubRegIdx);
3167 auto MIB = BuildMI(*MBB, MBBI, StoreMI.getDebugLoc(), TII->get(FPRStoreOpc))
3168 .addReg(FPRReg, getKillRegState(VecRegKilled));
3169 for (unsigned I = 1, E = StoreMI.getNumExplicitOperands(); I < E; ++I)
3170 MIB.add(StoreMI.getOperand(I));
3171 MIB.setMemRefs(StoreMI.memoperands());
3172
3173 MBBI = MBB->erase(MBBI);
3174 UMOVMI->eraseFromParent();
3175
3176 ++NumUMOVFoldedToFPRStore;
3177 return true;
3178}
3179
3180bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
3181 bool EnableNarrowZeroStOpt) {
3182 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
3183
3184 bool Modified = false;
3185 // Six transformations to do here:
3186 // 1) Find loads that directly read from stores and promote them by
3187 // replacing with mov instructions. If the store is wider than the load,
3188 // the load will be replaced with a bitfield extract.
3189 // e.g.,
3190 // str w1, [x0, #4]
3191 // ldrh w2, [x0, #6]
3192 // ; becomes
3193 // str w1, [x0, #4]
3194 // lsr w2, w1, #16
3196 MBBI != E;) {
3197 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
3198 Modified = true;
3199 else
3200 ++MBBI;
3201 }
3202 // 2) Merge adjacent zero stores into a wider store.
3203 // e.g.,
3204 // strh wzr, [x0]
3205 // strh wzr, [x0, #2]
3206 // ; becomes
3207 // str wzr, [x0]
3208 // e.g.,
3209 // str wzr, [x0]
3210 // str wzr, [x0, #4]
3211 // ; becomes
3212 // str xzr, [x0]
3213 if (EnableNarrowZeroStOpt)
3215 MBBI != E;) {
3216 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
3217 Modified = true;
3218 else
3219 ++MBBI;
3220 }
3221 // 3) Find loads and stores that can be merged into a single load or store
3222 // pair instruction.
3223 // When compiling for SVE 128, also try to combine SVE fill/spill
3224 // instructions into LDP/STP.
3225 // e.g.,
3226 // ldr x0, [x2]
3227 // ldr x1, [x2, #8]
3228 // ; becomes
3229 // ldp x0, x1, [x2]
3230 // e.g.,
3231 // ldr z0, [x2]
3232 // ldr z1, [x2, #1, mul vl]
3233 // ; becomes
3234 // ldp q0, q1, [x2]
3235
3237 DefinedInBB.clear();
3238 DefinedInBB.addLiveIns(MBB);
3239 }
3240
3242 MBBI != E;) {
3243 // Track currently live registers up to this point, to help with
3244 // searching for a rename register on demand.
3245 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
3246 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
3247 Modified = true;
3248 else
3249 ++MBBI;
3250 }
3251 // 4) Find base register updates that can be merged into the load or store
3252 // as a base-reg writeback.
3253 // e.g.,
3254 // ldr x0, [x2]
3255 // add x2, x2, #4
3256 // ; becomes
3257 // ldr x0, [x2], #4
3259 MBBI != E;) {
3260 if (isMergeableLdStUpdate(*MBBI, AFI) && tryToMergeLdStUpdate(MBBI))
3261 Modified = true;
3262 else
3263 ++MBBI;
3264 }
3265
3266 // 5) Find a register assigned with a const value that can be combined with
3267 // into the load or store. e.g.,
3268 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3269 // ldr x1, [x0, x8]
3270 // ; becomes
3271 // add x8, x0, a * (1<<12)
3272 // ldr x1, [x8, imm12]
3274 MBBI != E;) {
3275 int Scale;
3276 if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
3277 Modified = true;
3278 else
3279 ++MBBI;
3280 }
3281
3282 // 6) Replace UMOV (lane 0) + GPR store with a direct FPR sub-register store.
3283 // e.g.,
3284 // umov w8, v0.h[0]
3285 // strh w8, [x0]
3286 // ; becomes
3287 // str h0, [x0]
3289 MBBI != E;) {
3290 if (tryToReplaceUMOVStore(MBBI))
3291 Modified = true;
3292 else
3293 ++MBBI;
3294 }
3295
3296 return Modified;
3297}
3298
3299bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3300 Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
3301 TII = Subtarget->getInstrInfo();
3302 TRI = Subtarget->getRegisterInfo();
3303
3304 // Resize the modified and used register unit trackers. We do this once
3305 // per function and then clear the register units each time we optimize a load
3306 // or store.
3307 ModifiedRegUnits.init(*TRI);
3308 UsedRegUnits.init(*TRI);
3309 DefinedInBB.init(*TRI);
3310
3311 bool Modified = false;
3312 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3313 for (auto &MBB : Fn) {
3314 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
3315 Modified |= M;
3316 }
3317
3318 return Modified;
3319}
3320
3321// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
3322// stores near one another? Note: The pre-RA instruction scheduler already has
3323// hooks to try and schedule pairable loads/stores together to improve pairing
3324// opportunities. Thus, pre-RA pairing pass may not be worth the effort.
3325
3326// FIXME: When pairing store instructions it's very possible for this pass to
3327// hoist a store with a KILL marker above another use (without a KILL marker).
3328// The resulting IR is invalid, but nothing uses the KILL markers after this
3329// pass, so it's never caused a problem in practice.
3330
3331bool AArch64LoadStoreOptLegacy::runOnMachineFunction(MachineFunction &MF) {
3332 if (skipFunction(MF.getFunction()))
3333 return false;
3334 AArch64LoadStoreOpt Impl;
3335 Impl.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3336 return Impl.runOnMachineFunction(MF);
3337}
3338
3339/// createAArch64LoadStoreOptimizationPass - returns an instance of the
3340/// load / store optimization pass.
3342 return new AArch64LoadStoreOptLegacy();
3343}
3344
3348 AArch64LoadStoreOpt Impl;
3350 .getManager()
3351 .getResult<AAManager>(MF.getFunction());
3352 bool Changed = Impl.runOnMachineFunction(MF);
3353 if (!Changed)
3354 return PreservedAnalyses::all();
3357 return PA;
3358}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static unsigned getGPRToFPRStoreOpcode(unsigned GPRStoreOpc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static bool isRewritableImplicitDef(const MachineOperand &MO)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static unsigned getPostIndexedOpcode(unsigned Opc)
static unsigned getUMOVSubRegIdx(unsigned UMOVOpc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static cl::opt< unsigned > UMOVFoldLimit("aarch64-umov-fold-scan-limit", cl::init(16), cl::Hidden)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
static bool shouldExecute(CounterInfo &Counter)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:689
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
Definition MCAsmInfo.h:674
OpType getOperation() const
Definition MCDwarf.h:804
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
mop_range operands()
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
void dump() const
Definition Pass.cpp:146
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Wrapper class representing virtual and physical registers.
Definition Register.h:20
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition ilist_node.h:123
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
constexpr double e
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr RegState getKillRegState(bool B)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64LoadStoreOptLegacyPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.