LLVM 22.0.0git
AArch64LoadStoreOptimizer.cpp
Go to the documentation of this file.
1//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load / store related peephole
10// optimizations. This pass should be run after register allocation.
11//
12// The pass runs after the PrologEpilogInserter where we emit the CFI
13// instructions. In order to preserve the correctness of the unwind information,
14// the pass should not change the order of any two instructions, one of which
15// has the FrameSetup/FrameDestroy flag or, alternatively, apply an add-hoc fix
16// to unwind information.
17//
18//===----------------------------------------------------------------------===//
19
20#include "AArch64InstrInfo.h"
22#include "AArch64Subtarget.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringRef.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCDwarf.h"
40#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
46#include <cassert>
47#include <cstdint>
48#include <functional>
49#include <iterator>
50#include <limits>
51#include <optional>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "aarch64-ldst-opt"
56
57STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
58STATISTIC(NumPostFolded, "Number of post-index updates folded");
59STATISTIC(NumPreFolded, "Number of pre-index updates folded");
60STATISTIC(NumUnscaledPairCreated,
61 "Number of load/store from unscaled generated");
62STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
63STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
64STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
65 "not passed the alignment check");
66STATISTIC(NumConstOffsetFolded,
67 "Number of const offset of index address folded");
68
69DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
70 "Controls which pairs are considered for renaming");
71
72// The LdStLimit limits how far we search for load/store pairs.
73static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
74 cl::init(20), cl::Hidden);
75
76// The UpdateLimit limits how far we search for update instructions when we form
77// pre-/post-index instructions.
78static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
80
81// The LdStConstLimit limits how far we search for const offset instructions
82// when we form index address load/store instructions.
83static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
84 cl::init(10), cl::Hidden);
85
86// Enable register renaming to find additional store pairing opportunities.
87static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
88 cl::init(true), cl::Hidden);
89
90#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
91
92namespace {
93
94using LdStPairFlags = struct LdStPairFlags {
95 // If a matching instruction is found, MergeForward is set to true if the
96 // merge is to remove the first instruction and replace the second with
97 // a pair-wise insn, and false if the reverse is true.
98 bool MergeForward = false;
99
100 // SExtIdx gives the index of the result of the load pair that must be
101 // extended. The value of SExtIdx assumes that the paired load produces the
102 // value in this order: (I, returned iterator), i.e., -1 means no value has
103 // to be extended, 0 means I, and 1 means the returned iterator.
104 int SExtIdx = -1;
105
106 // If not none, RenameReg can be used to rename the result register of the
107 // first store in a pair. Currently this only works when merging stores
108 // forward.
109 std::optional<MCPhysReg> RenameReg;
110
111 LdStPairFlags() = default;
112
113 void setMergeForward(bool V = true) { MergeForward = V; }
114 bool getMergeForward() const { return MergeForward; }
115
116 void setSExtIdx(int V) { SExtIdx = V; }
117 int getSExtIdx() const { return SExtIdx; }
118
119 void setRenameReg(MCPhysReg R) { RenameReg = R; }
120 void clearRenameReg() { RenameReg = std::nullopt; }
121 std::optional<MCPhysReg> getRenameReg() const { return RenameReg; }
122};
123
124struct AArch64LoadStoreOpt : public MachineFunctionPass {
125 static char ID;
126
127 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
128
130 const AArch64InstrInfo *TII;
131 const TargetRegisterInfo *TRI;
132 const AArch64Subtarget *Subtarget;
133
134 // Track which register units have been modified and used.
135 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
136 LiveRegUnits DefinedInBB;
137
138 void getAnalysisUsage(AnalysisUsage &AU) const override {
141 }
142
143 // Scan the instructions looking for a load/store that can be combined
144 // with the current instruction into a load/store pair.
145 // Return the matching instruction if one is found, else MBB->end().
147 LdStPairFlags &Flags,
148 unsigned Limit,
149 bool FindNarrowMerge);
150
151 // Scan the instructions looking for a store that writes to the address from
152 // which the current load instruction reads. Return true if one is found.
153 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
155
156 // Merge the two instructions indicated into a wider narrow store instruction.
158 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
160 const LdStPairFlags &Flags);
161
162 // Merge the two instructions indicated into a single pair-wise instruction.
164 mergePairedInsns(MachineBasicBlock::iterator I,
166 const LdStPairFlags &Flags);
167
168 // Promote the load that reads directly from the address stored to.
170 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
172
173 // Scan the instruction list to find a base register update that can
174 // be combined with the current instruction (a load or store) using
175 // pre or post indexed addressing with writeback. Scan forwards.
177 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
178 int UnscaledOffset, unsigned Limit);
179
180 // Scan the instruction list to find a register assigned with a const
181 // value that can be combined with the current instruction (a load or store)
182 // using base addressing with writeback. Scan backwards.
184 findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
185 unsigned &Offset);
186
187 // Scan the instruction list to find a base register update that can
188 // be combined with the current instruction (a load or store) using
189 // pre or post indexed addressing with writeback. Scan backwards.
190 // `MergeEither` is set to true if the combined instruction may be placed
191 // either at the location of the load/store instruction or at the location of
192 // the update instruction.
194 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit,
195 bool &MergeEither);
196
197 // Find an instruction that updates the base register of the ld/st
198 // instruction.
199 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
200 unsigned BaseReg, int Offset);
201
202 bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
203 unsigned IndexReg, unsigned &Offset);
204
205 // Merge a pre- or post-index base register update into a ld/st instruction.
206 std::optional<MachineBasicBlock::iterator>
207 mergeUpdateInsn(MachineBasicBlock::iterator I,
208 MachineBasicBlock::iterator Update, bool IsForward,
209 bool IsPreIdx, bool MergeEither);
210
212 mergeConstOffsetInsn(MachineBasicBlock::iterator I,
213 MachineBasicBlock::iterator Update, unsigned Offset,
214 int Scale);
215
216 // Find and merge zero store instructions.
217 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
218
219 // Find and pair ldr/str instructions.
220 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
221
222 // Find and promote load instructions which read directly from store.
223 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
224
225 // Find and merge a base register updates before or after a ld/st instruction.
226 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
227
228 // Find and merge an index ldr/st instruction into a base ld/st instruction.
229 bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
230
231 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
232
233 bool runOnMachineFunction(MachineFunction &Fn) override;
234
235 MachineFunctionProperties getRequiredProperties() const override {
236 return MachineFunctionProperties().setNoVRegs();
237 }
238
239 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
240};
241
242char AArch64LoadStoreOpt::ID = 0;
243
244} // end anonymous namespace
245
246INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
247 AARCH64_LOAD_STORE_OPT_NAME, false, false)
248
249static bool isNarrowStore(unsigned Opc) {
250 switch (Opc) {
251 default:
252 return false;
253 case AArch64::STRBBui:
254 case AArch64::STURBBi:
255 case AArch64::STRHHui:
256 case AArch64::STURHHi:
257 return true;
258 }
259}
260
261// These instruction set memory tag and either keep memory contents unchanged or
262// set it to zero, ignoring the address part of the source register.
263static bool isTagStore(const MachineInstr &MI) {
264 switch (MI.getOpcode()) {
265 default:
266 return false;
267 case AArch64::STGi:
268 case AArch64::STZGi:
269 case AArch64::ST2Gi:
270 case AArch64::STZ2Gi:
271 return true;
272 }
273}
274
275static unsigned getMatchingNonSExtOpcode(unsigned Opc,
276 bool *IsValidLdStrOpc = nullptr) {
277 if (IsValidLdStrOpc)
278 *IsValidLdStrOpc = true;
279 switch (Opc) {
280 default:
281 if (IsValidLdStrOpc)
282 *IsValidLdStrOpc = false;
283 return std::numeric_limits<unsigned>::max();
284 case AArch64::STRDui:
285 case AArch64::STURDi:
286 case AArch64::STRDpre:
287 case AArch64::STRQui:
288 case AArch64::STURQi:
289 case AArch64::STRQpre:
290 case AArch64::STRBBui:
291 case AArch64::STURBBi:
292 case AArch64::STRHHui:
293 case AArch64::STURHHi:
294 case AArch64::STRWui:
295 case AArch64::STRWpre:
296 case AArch64::STURWi:
297 case AArch64::STRXui:
298 case AArch64::STRXpre:
299 case AArch64::STURXi:
300 case AArch64::STR_ZXI:
301 case AArch64::LDRDui:
302 case AArch64::LDURDi:
303 case AArch64::LDRDpre:
304 case AArch64::LDRQui:
305 case AArch64::LDURQi:
306 case AArch64::LDRQpre:
307 case AArch64::LDRWui:
308 case AArch64::LDURWi:
309 case AArch64::LDRWpre:
310 case AArch64::LDRXui:
311 case AArch64::LDURXi:
312 case AArch64::LDRXpre:
313 case AArch64::STRSui:
314 case AArch64::STURSi:
315 case AArch64::STRSpre:
316 case AArch64::LDRSui:
317 case AArch64::LDURSi:
318 case AArch64::LDRSpre:
319 case AArch64::LDR_ZXI:
320 return Opc;
321 case AArch64::LDRSWui:
322 return AArch64::LDRWui;
323 case AArch64::LDURSWi:
324 return AArch64::LDURWi;
325 case AArch64::LDRSWpre:
326 return AArch64::LDRWpre;
327 }
328}
329
330static unsigned getMatchingWideOpcode(unsigned Opc) {
331 switch (Opc) {
332 default:
333 llvm_unreachable("Opcode has no wide equivalent!");
334 case AArch64::STRBBui:
335 return AArch64::STRHHui;
336 case AArch64::STRHHui:
337 return AArch64::STRWui;
338 case AArch64::STURBBi:
339 return AArch64::STURHHi;
340 case AArch64::STURHHi:
341 return AArch64::STURWi;
342 case AArch64::STURWi:
343 return AArch64::STURXi;
344 case AArch64::STRWui:
345 return AArch64::STRXui;
346 }
347}
348
349static unsigned getMatchingPairOpcode(unsigned Opc) {
350 switch (Opc) {
351 default:
352 llvm_unreachable("Opcode has no pairwise equivalent!");
353 case AArch64::STRSui:
354 case AArch64::STURSi:
355 return AArch64::STPSi;
356 case AArch64::STRSpre:
357 return AArch64::STPSpre;
358 case AArch64::STRDui:
359 case AArch64::STURDi:
360 return AArch64::STPDi;
361 case AArch64::STRDpre:
362 return AArch64::STPDpre;
363 case AArch64::STRQui:
364 case AArch64::STURQi:
365 case AArch64::STR_ZXI:
366 return AArch64::STPQi;
367 case AArch64::STRQpre:
368 return AArch64::STPQpre;
369 case AArch64::STRWui:
370 case AArch64::STURWi:
371 return AArch64::STPWi;
372 case AArch64::STRWpre:
373 return AArch64::STPWpre;
374 case AArch64::STRXui:
375 case AArch64::STURXi:
376 return AArch64::STPXi;
377 case AArch64::STRXpre:
378 return AArch64::STPXpre;
379 case AArch64::LDRSui:
380 case AArch64::LDURSi:
381 return AArch64::LDPSi;
382 case AArch64::LDRSpre:
383 return AArch64::LDPSpre;
384 case AArch64::LDRDui:
385 case AArch64::LDURDi:
386 return AArch64::LDPDi;
387 case AArch64::LDRDpre:
388 return AArch64::LDPDpre;
389 case AArch64::LDRQui:
390 case AArch64::LDURQi:
391 case AArch64::LDR_ZXI:
392 return AArch64::LDPQi;
393 case AArch64::LDRQpre:
394 return AArch64::LDPQpre;
395 case AArch64::LDRWui:
396 case AArch64::LDURWi:
397 return AArch64::LDPWi;
398 case AArch64::LDRWpre:
399 return AArch64::LDPWpre;
400 case AArch64::LDRXui:
401 case AArch64::LDURXi:
402 return AArch64::LDPXi;
403 case AArch64::LDRXpre:
404 return AArch64::LDPXpre;
405 case AArch64::LDRSWui:
406 case AArch64::LDURSWi:
407 return AArch64::LDPSWi;
408 case AArch64::LDRSWpre:
409 return AArch64::LDPSWpre;
410 }
411}
412
415 unsigned LdOpc = LoadInst.getOpcode();
416 unsigned StOpc = StoreInst.getOpcode();
417 switch (LdOpc) {
418 default:
419 llvm_unreachable("Unsupported load instruction!");
420 case AArch64::LDRBBui:
421 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
422 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
423 case AArch64::LDURBBi:
424 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
425 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
426 case AArch64::LDRHHui:
427 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
428 StOpc == AArch64::STRXui;
429 case AArch64::LDURHHi:
430 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
431 StOpc == AArch64::STURXi;
432 case AArch64::LDRWui:
433 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
434 case AArch64::LDURWi:
435 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
436 case AArch64::LDRXui:
437 return StOpc == AArch64::STRXui;
438 case AArch64::LDURXi:
439 return StOpc == AArch64::STURXi;
440 }
441}
442
443static unsigned getPreIndexedOpcode(unsigned Opc) {
444 // FIXME: We don't currently support creating pre-indexed loads/stores when
445 // the load or store is the unscaled version. If we decide to perform such an
446 // optimization in the future the cases for the unscaled loads/stores will
447 // need to be added here.
448 switch (Opc) {
449 default:
450 llvm_unreachable("Opcode has no pre-indexed equivalent!");
451 case AArch64::STRSui:
452 return AArch64::STRSpre;
453 case AArch64::STRDui:
454 return AArch64::STRDpre;
455 case AArch64::STRQui:
456 return AArch64::STRQpre;
457 case AArch64::STRBBui:
458 return AArch64::STRBBpre;
459 case AArch64::STRHHui:
460 return AArch64::STRHHpre;
461 case AArch64::STRWui:
462 return AArch64::STRWpre;
463 case AArch64::STRXui:
464 return AArch64::STRXpre;
465 case AArch64::LDRSui:
466 return AArch64::LDRSpre;
467 case AArch64::LDRDui:
468 return AArch64::LDRDpre;
469 case AArch64::LDRQui:
470 return AArch64::LDRQpre;
471 case AArch64::LDRBBui:
472 return AArch64::LDRBBpre;
473 case AArch64::LDRHHui:
474 return AArch64::LDRHHpre;
475 case AArch64::LDRWui:
476 return AArch64::LDRWpre;
477 case AArch64::LDRXui:
478 return AArch64::LDRXpre;
479 case AArch64::LDRSWui:
480 return AArch64::LDRSWpre;
481 case AArch64::LDPSi:
482 return AArch64::LDPSpre;
483 case AArch64::LDPSWi:
484 return AArch64::LDPSWpre;
485 case AArch64::LDPDi:
486 return AArch64::LDPDpre;
487 case AArch64::LDPQi:
488 return AArch64::LDPQpre;
489 case AArch64::LDPWi:
490 return AArch64::LDPWpre;
491 case AArch64::LDPXi:
492 return AArch64::LDPXpre;
493 case AArch64::STPSi:
494 return AArch64::STPSpre;
495 case AArch64::STPDi:
496 return AArch64::STPDpre;
497 case AArch64::STPQi:
498 return AArch64::STPQpre;
499 case AArch64::STPWi:
500 return AArch64::STPWpre;
501 case AArch64::STPXi:
502 return AArch64::STPXpre;
503 case AArch64::STGi:
504 return AArch64::STGPreIndex;
505 case AArch64::STZGi:
506 return AArch64::STZGPreIndex;
507 case AArch64::ST2Gi:
508 return AArch64::ST2GPreIndex;
509 case AArch64::STZ2Gi:
510 return AArch64::STZ2GPreIndex;
511 case AArch64::STGPi:
512 return AArch64::STGPpre;
513 }
514}
515
516static unsigned getBaseAddressOpcode(unsigned Opc) {
517 // TODO: Add more index address stores.
518 switch (Opc) {
519 default:
520 llvm_unreachable("Opcode has no base address equivalent!");
521 case AArch64::LDRBroX:
522 return AArch64::LDRBui;
523 case AArch64::LDRBBroX:
524 return AArch64::LDRBBui;
525 case AArch64::LDRSBXroX:
526 return AArch64::LDRSBXui;
527 case AArch64::LDRSBWroX:
528 return AArch64::LDRSBWui;
529 case AArch64::LDRHroX:
530 return AArch64::LDRHui;
531 case AArch64::LDRHHroX:
532 return AArch64::LDRHHui;
533 case AArch64::LDRSHXroX:
534 return AArch64::LDRSHXui;
535 case AArch64::LDRSHWroX:
536 return AArch64::LDRSHWui;
537 case AArch64::LDRWroX:
538 return AArch64::LDRWui;
539 case AArch64::LDRSroX:
540 return AArch64::LDRSui;
541 case AArch64::LDRSWroX:
542 return AArch64::LDRSWui;
543 case AArch64::LDRDroX:
544 return AArch64::LDRDui;
545 case AArch64::LDRXroX:
546 return AArch64::LDRXui;
547 case AArch64::LDRQroX:
548 return AArch64::LDRQui;
549 }
550}
551
552static unsigned getPostIndexedOpcode(unsigned Opc) {
553 switch (Opc) {
554 default:
555 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
556 case AArch64::STRSui:
557 case AArch64::STURSi:
558 return AArch64::STRSpost;
559 case AArch64::STRDui:
560 case AArch64::STURDi:
561 return AArch64::STRDpost;
562 case AArch64::STRQui:
563 case AArch64::STURQi:
564 return AArch64::STRQpost;
565 case AArch64::STRBBui:
566 return AArch64::STRBBpost;
567 case AArch64::STRHHui:
568 return AArch64::STRHHpost;
569 case AArch64::STRWui:
570 case AArch64::STURWi:
571 return AArch64::STRWpost;
572 case AArch64::STRXui:
573 case AArch64::STURXi:
574 return AArch64::STRXpost;
575 case AArch64::LDRSui:
576 case AArch64::LDURSi:
577 return AArch64::LDRSpost;
578 case AArch64::LDRDui:
579 case AArch64::LDURDi:
580 return AArch64::LDRDpost;
581 case AArch64::LDRQui:
582 case AArch64::LDURQi:
583 return AArch64::LDRQpost;
584 case AArch64::LDRBBui:
585 return AArch64::LDRBBpost;
586 case AArch64::LDRHHui:
587 return AArch64::LDRHHpost;
588 case AArch64::LDRWui:
589 case AArch64::LDURWi:
590 return AArch64::LDRWpost;
591 case AArch64::LDRXui:
592 case AArch64::LDURXi:
593 return AArch64::LDRXpost;
594 case AArch64::LDRSWui:
595 return AArch64::LDRSWpost;
596 case AArch64::LDPSi:
597 return AArch64::LDPSpost;
598 case AArch64::LDPSWi:
599 return AArch64::LDPSWpost;
600 case AArch64::LDPDi:
601 return AArch64::LDPDpost;
602 case AArch64::LDPQi:
603 return AArch64::LDPQpost;
604 case AArch64::LDPWi:
605 return AArch64::LDPWpost;
606 case AArch64::LDPXi:
607 return AArch64::LDPXpost;
608 case AArch64::STPSi:
609 return AArch64::STPSpost;
610 case AArch64::STPDi:
611 return AArch64::STPDpost;
612 case AArch64::STPQi:
613 return AArch64::STPQpost;
614 case AArch64::STPWi:
615 return AArch64::STPWpost;
616 case AArch64::STPXi:
617 return AArch64::STPXpost;
618 case AArch64::STGi:
619 return AArch64::STGPostIndex;
620 case AArch64::STZGi:
621 return AArch64::STZGPostIndex;
622 case AArch64::ST2Gi:
623 return AArch64::ST2GPostIndex;
624 case AArch64::STZ2Gi:
625 return AArch64::STZ2GPostIndex;
626 case AArch64::STGPi:
627 return AArch64::STGPpost;
628 }
629}
630
632
633 unsigned OpcA = FirstMI.getOpcode();
634 unsigned OpcB = MI.getOpcode();
635
636 switch (OpcA) {
637 default:
638 return false;
639 case AArch64::STRSpre:
640 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
641 case AArch64::STRDpre:
642 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
643 case AArch64::STRQpre:
644 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
645 case AArch64::STRWpre:
646 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
647 case AArch64::STRXpre:
648 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
649 case AArch64::LDRSpre:
650 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
651 case AArch64::LDRDpre:
652 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
653 case AArch64::LDRQpre:
654 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
655 case AArch64::LDRWpre:
656 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
657 case AArch64::LDRXpre:
658 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
659 case AArch64::LDRSWpre:
660 return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
661 }
662}
663
664// Returns the scale and offset range of pre/post indexed variants of MI.
665static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
666 int &MinOffset, int &MaxOffset) {
667 bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
668 bool IsTagStore = isTagStore(MI);
669 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
670 // as in the "unsigned offset" variant.
671 // All other pre/post indexed ldst instructions are unscaled.
672 Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
673
674 if (IsPaired) {
675 MinOffset = -64;
676 MaxOffset = 63;
677 } else {
678 MinOffset = -256;
679 MaxOffset = 255;
680 }
681}
682
684 unsigned PairedRegOp = 0) {
685 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
686 bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
687 if (IsPreLdSt)
688 PairedRegOp += 1;
689 unsigned Idx =
690 AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
691 return MI.getOperand(Idx);
692}
693
696 const AArch64InstrInfo *TII) {
697 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
698 int LoadSize = TII->getMemScale(LoadInst);
699 int StoreSize = TII->getMemScale(StoreInst);
700 int UnscaledStOffset =
701 TII->hasUnscaledLdStOffset(StoreInst)
704 int UnscaledLdOffset =
705 TII->hasUnscaledLdStOffset(LoadInst)
708 return (UnscaledStOffset <= UnscaledLdOffset) &&
709 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
710}
711
713 unsigned Opc = MI.getOpcode();
714 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
715 isNarrowStore(Opc)) &&
716 getLdStRegOp(MI).getReg() == AArch64::WZR;
717}
718
720 switch (MI.getOpcode()) {
721 default:
722 return false;
723 // Scaled instructions.
724 case AArch64::LDRBBui:
725 case AArch64::LDRHHui:
726 case AArch64::LDRWui:
727 case AArch64::LDRXui:
728 // Unscaled instructions.
729 case AArch64::LDURBBi:
730 case AArch64::LDURHHi:
731 case AArch64::LDURWi:
732 case AArch64::LDURXi:
733 return true;
734 }
735}
736
738 unsigned Opc = MI.getOpcode();
739 switch (Opc) {
740 default:
741 return false;
742 // Scaled instructions.
743 case AArch64::STRSui:
744 case AArch64::STRDui:
745 case AArch64::STRQui:
746 case AArch64::STRXui:
747 case AArch64::STRWui:
748 case AArch64::STRHHui:
749 case AArch64::STRBBui:
750 case AArch64::LDRSui:
751 case AArch64::LDRDui:
752 case AArch64::LDRQui:
753 case AArch64::LDRXui:
754 case AArch64::LDRWui:
755 case AArch64::LDRHHui:
756 case AArch64::LDRBBui:
757 case AArch64::STGi:
758 case AArch64::STZGi:
759 case AArch64::ST2Gi:
760 case AArch64::STZ2Gi:
761 case AArch64::STGPi:
762 // Unscaled instructions.
763 case AArch64::STURSi:
764 case AArch64::STURDi:
765 case AArch64::STURQi:
766 case AArch64::STURWi:
767 case AArch64::STURXi:
768 case AArch64::LDURSi:
769 case AArch64::LDURDi:
770 case AArch64::LDURQi:
771 case AArch64::LDURWi:
772 case AArch64::LDURXi:
773 // Paired instructions.
774 case AArch64::LDPSi:
775 case AArch64::LDPSWi:
776 case AArch64::LDPDi:
777 case AArch64::LDPQi:
778 case AArch64::LDPWi:
779 case AArch64::LDPXi:
780 case AArch64::STPSi:
781 case AArch64::STPDi:
782 case AArch64::STPQi:
783 case AArch64::STPWi:
784 case AArch64::STPXi:
785 // Make sure this is a reg+imm (as opposed to an address reloc).
787 return false;
788
789 // When using stack tagging, simple sp+imm loads and stores are not
790 // tag-checked, but pre- and post-indexed versions of them are, so we can't
791 // replace the former with the latter. This transformation would be valid
792 // if the load/store accesses an untagged stack slot, but we don't have
793 // that information available after frame indices have been eliminated.
794 if (AFI.isMTETagged() &&
795 AArch64InstrInfo::getLdStBaseOp(MI).getReg() == AArch64::SP)
796 return false;
797
798 return true;
799 }
800}
801
802// Make sure this is a reg+reg Ld/St
803static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
804 unsigned Opc = MI.getOpcode();
805 switch (Opc) {
806 default:
807 return false;
808 // Scaled instructions.
809 // TODO: Add more index address stores.
810 case AArch64::LDRBroX:
811 case AArch64::LDRBBroX:
812 case AArch64::LDRSBXroX:
813 case AArch64::LDRSBWroX:
814 Scale = 1;
815 return true;
816 case AArch64::LDRHroX:
817 case AArch64::LDRHHroX:
818 case AArch64::LDRSHXroX:
819 case AArch64::LDRSHWroX:
820 Scale = 2;
821 return true;
822 case AArch64::LDRWroX:
823 case AArch64::LDRSroX:
824 case AArch64::LDRSWroX:
825 Scale = 4;
826 return true;
827 case AArch64::LDRDroX:
828 case AArch64::LDRXroX:
829 Scale = 8;
830 return true;
831 case AArch64::LDRQroX:
832 Scale = 16;
833 return true;
834 }
835}
836
837static bool isRewritableImplicitDef(unsigned Opc) {
838 switch (Opc) {
839 default:
840 return false;
841 case AArch64::ORRWrs:
842 case AArch64::ADDWri:
843 return true;
844 }
845}
846
848AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
850 const LdStPairFlags &Flags) {
852 "Expected promotable zero stores.");
853
854 MachineBasicBlock::iterator E = I->getParent()->end();
856 // If NextI is the second of the two instructions to be merged, we need
857 // to skip one further. Either way we merge will invalidate the iterator,
858 // and we don't need to scan the new instruction, as it's a pairwise
859 // instruction, which we're not considering for further action anyway.
860 if (NextI == MergeMI)
861 NextI = next_nodbg(NextI, E);
862
863 unsigned Opc = I->getOpcode();
864 unsigned MergeMIOpc = MergeMI->getOpcode();
865 bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
866 bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc);
867 int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
868 int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
869
870 bool MergeForward = Flags.getMergeForward();
871 // Insert our new paired instruction after whichever of the paired
872 // instructions MergeForward indicates.
873 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
874 // Also based on MergeForward is from where we copy the base register operand
875 // so we get the flags compatible with the input code.
876 const MachineOperand &BaseRegOp =
877 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
878 : AArch64InstrInfo::getLdStBaseOp(*I);
879
880 // Which register is Rt and which is Rt2 depends on the offset order.
881 int64_t IOffsetInBytes =
882 AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride;
883 int64_t MIOffsetInBytes =
885 MergeMIOffsetStride;
886 // Select final offset based on the offset order.
887 int64_t OffsetImm;
888 if (IOffsetInBytes > MIOffsetInBytes)
889 OffsetImm = MIOffsetInBytes;
890 else
891 OffsetImm = IOffsetInBytes;
892
893 int NewOpcode = getMatchingWideOpcode(Opc);
894 // Adjust final offset on scaled stores because the new instruction
895 // has a different scale.
896 if (!TII->hasUnscaledLdStOffset(NewOpcode)) {
897 int NewOffsetStride = TII->getMemScale(NewOpcode);
898 assert(((OffsetImm % NewOffsetStride) == 0) &&
899 "Offset should be a multiple of the store memory scale");
900 OffsetImm = OffsetImm / NewOffsetStride;
901 }
902
903 // Construct the new instruction.
904 DebugLoc DL = I->getDebugLoc();
905 MachineBasicBlock *MBB = I->getParent();
906 MachineInstrBuilder MIB;
907 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(NewOpcode))
908 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
909 .add(BaseRegOp)
910 .addImm(OffsetImm)
911 .cloneMergedMemRefs({&*I, &*MergeMI})
912 .setMIFlags(I->mergeFlagsWith(*MergeMI));
913 (void)MIB;
914
915 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
916 LLVM_DEBUG(I->print(dbgs()));
917 LLVM_DEBUG(dbgs() << " ");
918 LLVM_DEBUG(MergeMI->print(dbgs()));
919 LLVM_DEBUG(dbgs() << " with instruction:\n ");
920 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
921 LLVM_DEBUG(dbgs() << "\n");
922
923 // Erase the old instructions.
924 I->eraseFromParent();
925 MergeMI->eraseFromParent();
926 return NextI;
927}
928
929// Apply Fn to all instructions between MI and the beginning of the block, until
930// a def for DefReg is reached. Returns true, iff Fn returns true for all
931// visited instructions. Stop after visiting Limit iterations.
933 const TargetRegisterInfo *TRI, unsigned Limit,
934 std::function<bool(MachineInstr &, bool)> &Fn) {
935 auto MBB = MI.getParent();
936 for (MachineInstr &I :
937 instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
938 if (!Limit)
939 return false;
940 --Limit;
941
942 bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
943 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
944 TRI->regsOverlap(MOP.getReg(), DefReg);
945 });
946 if (!Fn(I, isDef))
947 return false;
948 if (isDef)
949 break;
950 }
951 return true;
952}
953
955 const TargetRegisterInfo *TRI) {
956
957 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
958 if (MOP.isReg() && MOP.isKill())
959 Units.removeReg(MOP.getReg());
960
961 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
962 if (MOP.isReg() && !MOP.isKill())
963 Units.addReg(MOP.getReg());
964}
965
966/// This function will add a new entry into the debugValueSubstitutions table
967/// when two instruction have been merged into a new one represented by \p
968/// MergedInstr.
970 unsigned InstrNumToSet,
971 MachineInstr &OriginalInstr,
972 MachineInstr &MergedInstr) {
973
974 // Figure out the Operand Index of the destination register of the
975 // OriginalInstr in the new MergedInstr.
976 auto Reg = OriginalInstr.getOperand(0).getReg();
977 unsigned OperandNo = 0;
978 bool RegFound = false;
979 for (const auto Op : MergedInstr.operands()) {
980 if (Op.getReg() == Reg) {
981 RegFound = true;
982 break;
983 }
984 OperandNo++;
985 }
986
987 if (RegFound)
988 MF->makeDebugValueSubstitution({OriginalInstr.peekDebugInstrNum(), 0},
989 {InstrNumToSet, OperandNo});
990}
991
993AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
995 const LdStPairFlags &Flags) {
996 MachineBasicBlock::iterator E = I->getParent()->end();
998 // If NextI is the second of the two instructions to be merged, we need
999 // to skip one further. Either way we merge will invalidate the iterator,
1000 // and we don't need to scan the new instruction, as it's a pairwise
1001 // instruction, which we're not considering for further action anyway.
1002 if (NextI == Paired)
1003 NextI = next_nodbg(NextI, E);
1004
1005 int SExtIdx = Flags.getSExtIdx();
1006 unsigned Opc =
1007 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
1008 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
1009 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
1010
1011 bool MergeForward = Flags.getMergeForward();
1012
1013 std::optional<MCPhysReg> RenameReg = Flags.getRenameReg();
1014 if (RenameReg) {
1015 MCRegister RegToRename = getLdStRegOp(*I).getReg();
1016 DefinedInBB.addReg(*RenameReg);
1017
1018 // Return the sub/super register for RenameReg, matching the size of
1019 // OriginalReg.
1020 auto GetMatchingSubReg =
1021 [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg {
1022 for (MCPhysReg SubOrSuper :
1023 TRI->sub_and_superregs_inclusive(*RenameReg)) {
1024 if (C->contains(SubOrSuper))
1025 return SubOrSuper;
1026 }
1027 llvm_unreachable("Should have found matching sub or super register!");
1028 };
1029
1030 std::function<bool(MachineInstr &, bool)> UpdateMIs =
1031 [this, RegToRename, GetMatchingSubReg, MergeForward](MachineInstr &MI,
1032 bool IsDef) {
1033 if (IsDef) {
1034 bool SeenDef = false;
1035 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1036 MachineOperand &MOP = MI.getOperand(OpIdx);
1037 // Rename the first explicit definition and all implicit
1038 // definitions matching RegToRename.
1039 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1040 (!MergeForward || !SeenDef ||
1041 (MOP.isDef() && MOP.isImplicit())) &&
1042 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1043 assert((MOP.isImplicit() ||
1044 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1045 "Need renamable operands");
1046 Register MatchingReg;
1047 if (const TargetRegisterClass *RC =
1048 MI.getRegClassConstraint(OpIdx, TII, TRI))
1049 MatchingReg = GetMatchingSubReg(RC);
1050 else {
1051 if (!isRewritableImplicitDef(MI.getOpcode()))
1052 continue;
1053 MatchingReg = GetMatchingSubReg(
1054 TRI->getMinimalPhysRegClass(MOP.getReg()));
1055 }
1056 MOP.setReg(MatchingReg);
1057 SeenDef = true;
1058 }
1059 }
1060 } else {
1061 for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
1062 MachineOperand &MOP = MI.getOperand(OpIdx);
1063 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1064 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
1065 assert((MOP.isImplicit() ||
1066 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
1067 "Need renamable operands");
1068 Register MatchingReg;
1069 if (const TargetRegisterClass *RC =
1070 MI.getRegClassConstraint(OpIdx, TII, TRI))
1071 MatchingReg = GetMatchingSubReg(RC);
1072 else
1073 MatchingReg = GetMatchingSubReg(
1074 TRI->getMinimalPhysRegClass(MOP.getReg()));
1075 assert(MatchingReg != AArch64::NoRegister &&
1076 "Cannot find matching regs for renaming");
1077 MOP.setReg(MatchingReg);
1078 }
1079 }
1080 }
1081 LLVM_DEBUG(dbgs() << "Renamed " << MI);
1082 return true;
1083 };
1084 forAllMIsUntilDef(MergeForward ? *I : *Paired->getPrevNode(), RegToRename,
1085 TRI, UINT32_MAX, UpdateMIs);
1086
1087#if !defined(NDEBUG)
1088 // For forward merging store:
1089 // Make sure the register used for renaming is not used between the
1090 // paired instructions. That would trash the content before the new
1091 // paired instruction.
1092 MCPhysReg RegToCheck = *RenameReg;
1093 // For backward merging load:
1094 // Make sure the register being renamed is not used between the
1095 // paired instructions. That would trash the content after the new
1096 // paired instruction.
1097 if (!MergeForward)
1098 RegToCheck = RegToRename;
1099 for (auto &MI :
1100 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
1101 MergeForward ? std::next(I) : I,
1102 MergeForward ? std::next(Paired) : Paired))
1103 assert(all_of(MI.operands(),
1104 [this, RegToCheck](const MachineOperand &MOP) {
1105 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1106 MOP.isUndef() ||
1107 !TRI->regsOverlap(MOP.getReg(), RegToCheck);
1108 }) &&
1109 "Rename register used between paired instruction, trashing the "
1110 "content");
1111#endif
1112 }
1113
1114 // Insert our new paired instruction after whichever of the paired
1115 // instructions MergeForward indicates.
1116 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
1117 // Also based on MergeForward is from where we copy the base register operand
1118 // so we get the flags compatible with the input code.
1119 const MachineOperand &BaseRegOp =
1120 MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
1121 : AArch64InstrInfo::getLdStBaseOp(*I);
1122
1124 int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
1125 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
1126 if (IsUnscaled != PairedIsUnscaled) {
1127 // We're trying to pair instructions that differ in how they are scaled. If
1128 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
1129 // the opposite (i.e., make Paired's offset unscaled).
1130 int MemSize = TII->getMemScale(*Paired);
1131 if (PairedIsUnscaled) {
1132 // If the unscaled offset isn't a multiple of the MemSize, we can't
1133 // pair the operations together.
1134 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
1135 "Offset should be a multiple of the stride!");
1136 PairedOffset /= MemSize;
1137 } else {
1138 PairedOffset *= MemSize;
1139 }
1140 }
1141
1142 // Which register is Rt and which is Rt2 depends on the offset order.
1143 // However, for pre load/stores the Rt should be the one of the pre
1144 // load/store.
1145 MachineInstr *RtMI, *Rt2MI;
1146 if (Offset == PairedOffset + OffsetStride &&
1148 RtMI = &*Paired;
1149 Rt2MI = &*I;
1150 // Here we swapped the assumption made for SExtIdx.
1151 // I.e., we turn ldp I, Paired into ldp Paired, I.
1152 // Update the index accordingly.
1153 if (SExtIdx != -1)
1154 SExtIdx = (SExtIdx + 1) % 2;
1155 } else {
1156 RtMI = &*I;
1157 Rt2MI = &*Paired;
1158 }
1159 int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
1160 // Scale the immediate offset, if necessary.
1161 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
1162 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
1163 "Unscaled offset cannot be scaled.");
1164 OffsetImm /= TII->getMemScale(*RtMI);
1165 }
1166
1167 // Construct the new instruction.
1168 MachineInstrBuilder MIB;
1169 DebugLoc DL = I->getDebugLoc();
1170 MachineBasicBlock *MBB = I->getParent();
1171 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
1172 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
1173 MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
1174 // Kill flags may become invalid when moving stores for pairing.
1175 if (RegOp0.isUse()) {
1176 if (!MergeForward) {
1177 // Clear kill flags on store if moving upwards. Example:
1178 // STRWui kill %w0, ...
1179 // USE %w1
1180 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
1181 // We are about to move the store of w1, so its kill flag may become
1182 // invalid; not the case for w0.
1183 // Since w1 is used between the stores, the kill flag on w1 is cleared
1184 // after merging.
1185 // STPWi kill %w0, %w1, ...
1186 // USE %w1
1187 for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
1188 if (It->readsRegister(PairedRegOp.getReg(), TRI))
1189 PairedRegOp.setIsKill(false);
1190 } else {
1191 // Clear kill flags of the first stores register. Example:
1192 // STRWui %w1, ...
1193 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1194 // STRW %w0
1196 for (MachineInstr &MI :
1197 make_range(std::next(I->getIterator()), Paired->getIterator()))
1198 MI.clearRegisterKills(Reg, TRI);
1199 }
1200 }
1201
1202 unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1203 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1204
1205 // Adds the pre-index operand for pre-indexed ld/st pairs.
1206 if (AArch64InstrInfo::isPreLdSt(*RtMI))
1207 MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1208
1209 MIB.add(RegOp0)
1210 .add(RegOp1)
1211 .add(BaseRegOp)
1212 .addImm(OffsetImm)
1213 .cloneMergedMemRefs({&*I, &*Paired})
1214 .setMIFlags(I->mergeFlagsWith(*Paired));
1215
1216 (void)MIB;
1217
1218 LLVM_DEBUG(
1219 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1220 LLVM_DEBUG(I->print(dbgs()));
1221 LLVM_DEBUG(dbgs() << " ");
1222 LLVM_DEBUG(Paired->print(dbgs()));
1223 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1224 if (SExtIdx != -1) {
1225 // Generate the sign extension for the proper result of the ldp.
1226 // I.e., with X1, that would be:
1227 // %w1 = KILL %w1, implicit-def %x1
1228 // %x1 = SBFMXri killed %x1, 0, 31
1229 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1230 // Right now, DstMO has the extended register, since it comes from an
1231 // extended opcode.
1232 Register DstRegX = DstMO.getReg();
1233 // Get the W variant of that register.
1234 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1235 // Update the result of LDP to use the W instead of the X variant.
1236 DstMO.setReg(DstRegW);
1237 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1238 LLVM_DEBUG(dbgs() << "\n");
1239 // Make the machine verifier happy by providing a definition for
1240 // the X register.
1241 // Insert this definition right after the generated LDP, i.e., before
1242 // InsertionPoint.
1243 MachineInstrBuilder MIBKill =
1244 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1245 .addReg(DstRegW)
1246 .addReg(DstRegX, RegState::Define);
1247 MIBKill->getOperand(2).setImplicit();
1248 // Create the sign extension.
1249 MachineInstrBuilder MIBSXTW =
1250 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1251 .addReg(DstRegX)
1252 .addImm(0)
1253 .addImm(31);
1254 (void)MIBSXTW;
1255
1256 // In the case of a sign-extend, where we have something like:
1257 // debugValueSubstitutions:[]
1258 // $w1 = LDRWui $x0, 1, debug-instr-number 1
1259 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1260 // $x0 = LDRSWui $x0, 0, debug-instr-number 2
1261 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1262
1263 // It will be converted to:
1264 // debugValueSubstitutions:[]
1265 // $w0, $w1 = LDPWi $x0, 0
1266 // $w0 = KILL $w0, implicit-def $x0
1267 // $x0 = SBFMXri $x0, 0, 31
1268 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1269 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1270
1271 // We want the final result to look like:
1272 // debugValueSubstitutions:
1273 // - { srcinst: 1, srcop: 0, dstinst: 4, dstop: 1, subreg: 0 }
1274 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1275 // $w0, $w1 = LDPWi $x0, 0, debug-instr-number 4
1276 // $w0 = KILL $w0, implicit-def $x0
1277 // $x0 = SBFMXri $x0, 0, 31, debug-instr-number 3
1278 // DBG_INSTR_REF !7, dbg-instr-ref(1, 0), debug-location !9
1279 // DBG_INSTR_REF !8, dbg-instr-ref(2, 0), debug-location !9
1280
1281 // $x0 is where the final value is stored, so the sign extend (SBFMXri)
1282 // instruction contains the final value we care about we give it a new
1283 // debug-instr-number 3. Whereas, $w1 contains the final value that we care
1284 // about, therefore the LDP instruction is also given a new
1285 // debug-instr-number 4. We have to add these substitutions to the
1286 // debugValueSubstitutions table. However, we also have to ensure that the
1287 // OpIndex that pointed to debug-instr-number 1 gets updated to 1, because
1288 // $w1 is the second operand of the LDP instruction.
1289
1290 if (I->peekDebugInstrNum()) {
1291 // If I is the instruction which got sign extended and has a
1292 // debug-instr-number, give the SBFMXri instruction a new
1293 // debug-instr-number, and update the debugValueSubstitutions table with
1294 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1295 // instruction a new debug-instr-number, and update the
1296 // debugValueSubstitutions table with the new debug-instr-number and
1297 // OpIndex pair.
1298 unsigned NewInstrNum;
1299 if (DstRegX == I->getOperand(0).getReg()) {
1300 NewInstrNum = MIBSXTW->getDebugInstrNum();
1301 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I,
1302 *MIBSXTW);
1303 } else {
1304 NewInstrNum = MIB->getDebugInstrNum();
1305 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *I, *MIB);
1306 }
1307 }
1308 if (Paired->peekDebugInstrNum()) {
1309 // If Paired is the instruction which got sign extended and has a
1310 // debug-instr-number, give the SBFMXri instruction a new
1311 // debug-instr-number, and update the debugValueSubstitutions table with
1312 // the new debug-instr-number and OpIndex pair. Otherwise, give the Merged
1313 // instruction a new debug-instr-number, and update the
1314 // debugValueSubstitutions table with the new debug-instr-number and
1315 // OpIndex pair.
1316 unsigned NewInstrNum;
1317 if (DstRegX == Paired->getOperand(0).getReg()) {
1318 NewInstrNum = MIBSXTW->getDebugInstrNum();
1319 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1320 *MIBSXTW);
1321 } else {
1322 NewInstrNum = MIB->getDebugInstrNum();
1323 addDebugSubstitutionsToTable(MBB->getParent(), NewInstrNum, *Paired,
1324 *MIB);
1325 }
1326 }
1327
1328 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1329 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1330 } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1331 // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1332 // variant of the registers.
1333 MachineOperand &MOp0 = MIB->getOperand(0);
1334 MachineOperand &MOp1 = MIB->getOperand(1);
1335 assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
1336 AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
1337 MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
1338 MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
1339 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1340 } else {
1341
1342 // In the case that the merge doesn't result in a sign-extend, if we have
1343 // something like:
1344 // debugValueSubstitutions:[]
1345 // $x1 = LDRXui $x0, 1, debug-instr-number 1
1346 // DBG_INSTR_REF !13, dbg-instr-ref(1, 0), debug-location !11
1347 // $x0 = LDRXui killed $x0, 0, debug-instr-number 2
1348 // DBG_INSTR_REF !14, dbg-instr-ref(2, 0), debug-location !11
1349
1350 // It will be converted to:
1351 // debugValueSubstitutions: []
1352 // $x0, $x1 = LDPXi $x0, 0
1353 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1354 // DBG_INSTR_REF !13, dbg-instr-ref(2, 0), debug-location !14
1355
1356 // We want the final result to look like:
1357 // debugValueSubstitutions:
1358 // - { srcinst: 1, srcop: 0, dstinst: 3, dstop: 1, subreg: 0 }
1359 // - { srcinst: 2, srcop: 0, dstinst: 3, dstop: 0, subreg: 0 }
1360 // $x0, $x1 = LDPXi $x0, 0, debug-instr-number 3
1361 // DBG_INSTR_REF !12, dbg-instr-ref(1, 0), debug-location !14
1362 // DBG_INSTR_REF !12, dbg-instr-ref(2, 0), debug-location !14
1363
1364 // Here all that needs to be done is, that the LDP instruction needs to be
1365 // updated with a new debug-instr-number, we then need to add entries into
1366 // the debugSubstitutions table to map the old instr-refs to the new ones.
1367
1368 // Assign new DebugInstrNum to the Paired instruction.
1369 if (I->peekDebugInstrNum()) {
1370 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1371 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *I,
1372 *MIB);
1373 }
1374 if (Paired->peekDebugInstrNum()) {
1375 unsigned NewDebugInstrNum = MIB->getDebugInstrNum();
1376 addDebugSubstitutionsToTable(MBB->getParent(), NewDebugInstrNum, *Paired,
1377 *MIB);
1378 }
1379
1380 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1381 }
1382 LLVM_DEBUG(dbgs() << "\n");
1383
1384 if (MergeForward)
1385 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1386 if (MOP.isReg() && MOP.isKill())
1387 DefinedInBB.addReg(MOP.getReg());
1388
1389 // Copy over any implicit-def operands. This is like MI.copyImplicitOps, but
1390 // only copies implicit defs and makes sure that each operand is only added
1391 // once in case of duplicates.
1392 auto CopyImplicitOps = [&](MachineBasicBlock::iterator MI1,
1394 SmallSetVector<Register, 4> Ops;
1395 for (const MachineOperand &MO :
1396 llvm::drop_begin(MI1->operands(), MI1->getDesc().getNumOperands()))
1397 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1398 Ops.insert(MO.getReg());
1399 for (const MachineOperand &MO :
1400 llvm::drop_begin(MI2->operands(), MI2->getDesc().getNumOperands()))
1401 if (MO.isReg() && MO.isImplicit() && MO.isDef())
1402 Ops.insert(MO.getReg());
1403 for (auto Op : Ops)
1404 MIB.addDef(Op, RegState::Implicit);
1405 };
1406 CopyImplicitOps(I, Paired);
1407
1408 // Erase the old instructions.
1409 I->eraseFromParent();
1410 Paired->eraseFromParent();
1411
1412 return NextI;
1413}
1414
1416AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1419 next_nodbg(LoadI, LoadI->getParent()->end());
1420
1421 int LoadSize = TII->getMemScale(*LoadI);
1422 int StoreSize = TII->getMemScale(*StoreI);
1423 Register LdRt = getLdStRegOp(*LoadI).getReg();
1424 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1425 Register StRt = getLdStRegOp(*StoreI).getReg();
1426 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1427
1428 assert((IsStoreXReg ||
1429 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1430 "Unexpected RegClass");
1431
1432 MachineInstr *BitExtMI;
1433 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1434 // Remove the load, if the destination register of the loads is the same
1435 // register for stored value.
1436 if (StRt == LdRt && LoadSize == 8) {
1437 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1438 LoadI->getIterator())) {
1439 if (MI.killsRegister(StRt, TRI)) {
1440 MI.clearRegisterKills(StRt, TRI);
1441 break;
1442 }
1443 }
1444 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1445 LLVM_DEBUG(LoadI->print(dbgs()));
1446 LLVM_DEBUG(dbgs() << "\n");
1447 LoadI->eraseFromParent();
1448 return NextI;
1449 }
1450 // Replace the load with a mov if the load and store are in the same size.
1451 BitExtMI =
1452 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1453 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1454 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1455 .add(StMO)
1457 .setMIFlags(LoadI->getFlags());
1458 } else {
1459 // FIXME: Currently we disable this transformation in big-endian targets as
1460 // performance and correctness are verified only in little-endian.
1461 if (!Subtarget->isLittleEndian())
1462 return NextI;
1463 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1464 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1465 "Unsupported ld/st match");
1466 assert(LoadSize <= StoreSize && "Invalid load size");
1467 int UnscaledLdOffset =
1468 IsUnscaled
1470 : AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1471 int UnscaledStOffset =
1472 IsUnscaled
1474 : AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1475 int Width = LoadSize * 8;
1476 Register DestReg =
1477 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1478 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1479 : LdRt;
1480
1481 assert((UnscaledLdOffset >= UnscaledStOffset &&
1482 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1483 "Invalid offset");
1484
1485 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1486 int Imms = Immr + Width - 1;
1487 if (UnscaledLdOffset == UnscaledStOffset) {
1488 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1489 | ((Immr) << 6) // immr
1490 | ((Imms) << 0) // imms
1491 ;
1492
1493 BitExtMI =
1494 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1495 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1496 DestReg)
1497 .add(StMO)
1498 .addImm(AndMaskEncoded)
1499 .setMIFlags(LoadI->getFlags());
1500 } else if (IsStoreXReg && Imms == 31) {
1501 // Use the 32 bit variant of UBFM if it's the LSR alias of the
1502 // instruction.
1503 assert(Immr <= Imms && "Expected LSR alias of UBFM");
1504 BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1505 TII->get(AArch64::UBFMWri),
1506 TRI->getSubReg(DestReg, AArch64::sub_32))
1507 .addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1508 .addImm(Immr)
1509 .addImm(Imms)
1510 .setMIFlags(LoadI->getFlags());
1511 } else {
1512 BitExtMI =
1513 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1514 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1515 DestReg)
1516 .add(StMO)
1517 .addImm(Immr)
1518 .addImm(Imms)
1519 .setMIFlags(LoadI->getFlags());
1520 }
1521 }
1522
1523 // Clear kill flags between store and load.
1524 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1525 BitExtMI->getIterator()))
1526 if (MI.killsRegister(StRt, TRI)) {
1527 MI.clearRegisterKills(StRt, TRI);
1528 break;
1529 }
1530
1531 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1532 LLVM_DEBUG(StoreI->print(dbgs()));
1533 LLVM_DEBUG(dbgs() << " ");
1534 LLVM_DEBUG(LoadI->print(dbgs()));
1535 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1536 LLVM_DEBUG(StoreI->print(dbgs()));
1537 LLVM_DEBUG(dbgs() << " ");
1538 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1539 LLVM_DEBUG(dbgs() << "\n");
1540
1541 // Erase the old instructions.
1542 LoadI->eraseFromParent();
1543 return NextI;
1544}
1545
1546static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1547 // Convert the byte-offset used by unscaled into an "element" offset used
1548 // by the scaled pair load/store instructions.
1549 if (IsUnscaled) {
1550 // If the byte-offset isn't a multiple of the stride, there's no point
1551 // trying to match it.
1552 if (Offset % OffsetStride)
1553 return false;
1554 Offset /= OffsetStride;
1555 }
1556 return Offset <= 63 && Offset >= -64;
1557}
1558
1559// Do alignment, specialized to power of 2 and for signed ints,
1560// avoiding having to do a C-style cast from uint_64t to int when
1561// using alignTo from include/llvm/Support/MathExtras.h.
1562// FIXME: Move this function to include/MathExtras.h?
1563static int alignTo(int Num, int PowOf2) {
1564 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1565}
1566
1567static bool mayAlias(MachineInstr &MIa,
1569 AliasAnalysis *AA) {
1570 for (MachineInstr *MIb : MemInsns) {
1571 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
1572 LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
1573 return true;
1574 }
1575 }
1576
1577 LLVM_DEBUG(dbgs() << "No aliases found\n");
1578 return false;
1579}
1580
1581bool AArch64LoadStoreOpt::findMatchingStore(
1582 MachineBasicBlock::iterator I, unsigned Limit,
1584 MachineBasicBlock::iterator B = I->getParent()->begin();
1586 MachineInstr &LoadMI = *I;
1588
1589 // If the load is the first instruction in the block, there's obviously
1590 // not any matching store.
1591 if (MBBI == B)
1592 return false;
1593
1594 // Track which register units have been modified and used between the first
1595 // insn and the second insn.
1596 ModifiedRegUnits.clear();
1597 UsedRegUnits.clear();
1598
1599 unsigned Count = 0;
1600 do {
1601 MBBI = prev_nodbg(MBBI, B);
1602 MachineInstr &MI = *MBBI;
1603
1604 // Don't count transient instructions towards the search limit since there
1605 // may be different numbers of them if e.g. debug information is present.
1606 if (!MI.isTransient())
1607 ++Count;
1608
1609 // If the load instruction reads directly from the address to which the
1610 // store instruction writes and the stored value is not modified, we can
1611 // promote the load. Since we do not handle stores with pre-/post-index,
1612 // it's unnecessary to check if BaseReg is modified by the store itself.
1613 // Also we can't handle stores without an immediate offset operand,
1614 // while the operand might be the address for a global variable.
1615 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1618 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1619 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1620 StoreI = MBBI;
1621 return true;
1622 }
1623
1624 if (MI.isCall())
1625 return false;
1626
1627 // Update modified / uses register units.
1628 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1629
1630 // Otherwise, if the base register is modified, we have no match, so
1631 // return early.
1632 if (!ModifiedRegUnits.available(BaseReg))
1633 return false;
1634
1635 // If we encounter a store aliased with the load, return early.
1636 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1637 return false;
1638 } while (MBBI != B && Count < Limit);
1639 return false;
1640}
1641
1642static bool needsWinCFI(const MachineFunction *MF) {
1643 return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1645}
1646
1647// Returns true if FirstMI and MI are candidates for merging or pairing.
1648// Otherwise, returns false.
1650 LdStPairFlags &Flags,
1651 const AArch64InstrInfo *TII) {
1652 // If this is volatile or if pairing is suppressed, not a candidate.
1653 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1654 return false;
1655
1656 // We should have already checked FirstMI for pair suppression and volatility.
1657 assert(!FirstMI.hasOrderedMemoryRef() &&
1658 !TII->isLdStPairSuppressed(FirstMI) &&
1659 "FirstMI shouldn't get here if either of these checks are true.");
1660
1661 if (needsWinCFI(MI.getMF()) && (MI.getFlag(MachineInstr::FrameSetup) ||
1663 return false;
1664
1665 unsigned OpcA = FirstMI.getOpcode();
1666 unsigned OpcB = MI.getOpcode();
1667
1668 // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1669 if (OpcA == OpcB)
1670 return !AArch64InstrInfo::isPreLdSt(FirstMI);
1671
1672 // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1673 // allow pairing them with other instructions.
1674 if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1675 OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1676 return false;
1677
1678 // Two pre ld/st of different opcodes cannot be merged either
1680 return false;
1681
1682 // Try to match a sign-extended load/store with a zero-extended load/store.
1683 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1684 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1685 assert(IsValidLdStrOpc &&
1686 "Given Opc should be a Load or Store with an immediate");
1687 // OpcA will be the first instruction in the pair.
1688 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1689 Flags.setSExtIdx(NonSExtOpc == OpcA ? 1 : 0);
1690 return true;
1691 }
1692
1693 // If the second instruction isn't even a mergable/pairable load/store, bail
1694 // out.
1695 if (!PairIsValidLdStrOpc)
1696 return false;
1697
1698 // Narrow stores do not have a matching pair opcodes, so constrain their
1699 // merging to zero stores.
1700 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1701 return getLdStRegOp(FirstMI).getReg() == AArch64::WZR &&
1702 getLdStRegOp(MI).getReg() == AArch64::WZR &&
1703 TII->getMemScale(FirstMI) == TII->getMemScale(MI);
1704
1705 // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1706 // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
1707 // are candidate pairs that can be merged.
1708 if (isPreLdStPairCandidate(FirstMI, MI))
1709 return true;
1710
1711 // Try to match an unscaled load/store with a scaled load/store.
1712 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1714
1715 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1716}
1717
1718static bool canRenameMOP(const MachineOperand &MOP,
1719 const TargetRegisterInfo *TRI) {
1720 if (MOP.isReg()) {
1721 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1722 // Renaming registers with multiple disjunct sub-registers (e.g. the
1723 // result of a LD3) means that all sub-registers are renamed, potentially
1724 // impacting other instructions we did not check. Bail out.
1725 // Note that this relies on the structure of the AArch64 register file. In
1726 // particular, a subregister cannot be written without overwriting the
1727 // whole register.
1728 if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
1729 (TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
1730 TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
1731 TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
1732 LLVM_DEBUG(
1733 dbgs()
1734 << " Cannot rename operands with multiple disjunct subregisters ("
1735 << MOP << ")\n");
1736 return false;
1737 }
1738
1739 // We cannot rename arbitrary implicit-defs, the specific rule to rewrite
1740 // them must be known. For example, in ORRWrs the implicit-def
1741 // corresponds to the result register.
1742 if (MOP.isImplicit() && MOP.isDef()) {
1744 return false;
1745 return TRI->isSuperOrSubRegisterEq(
1746 MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
1747 }
1748 }
1749 return MOP.isImplicit() ||
1750 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1751}
1752
1753static bool
1756 const TargetRegisterInfo *TRI) {
1757 if (!FirstMI.mayStore())
1758 return false;
1759
1760 // Check if we can find an unused register which we can use to rename
1761 // the register used by the first load/store.
1762
1763 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1764 // For now, we only rename if the store operand gets killed at the store.
1765 if (!getLdStRegOp(FirstMI).isKill() &&
1766 !any_of(FirstMI.operands(),
1767 [TRI, RegToRename](const MachineOperand &MOP) {
1768 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1769 MOP.isImplicit() && MOP.isKill() &&
1770 TRI->regsOverlap(RegToRename, MOP.getReg());
1771 })) {
1772 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI);
1773 return false;
1774 }
1775
1776 bool FoundDef = false;
1777
1778 // For each instruction between FirstMI and the previous def for RegToRename,
1779 // we
1780 // * check if we can rename RegToRename in this instruction
1781 // * collect the registers used and required register classes for RegToRename.
1782 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1783 bool IsDef) {
1784 LLVM_DEBUG(dbgs() << "Checking " << MI);
1785 // Currently we do not try to rename across frame-setup instructions.
1786 if (MI.getFlag(MachineInstr::FrameSetup)) {
1787 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1788 << "currently\n");
1789 return false;
1790 }
1791
1792 UsedInBetween.accumulate(MI);
1793
1794 // For a definition, check that we can rename the definition and exit the
1795 // loop.
1796 FoundDef = IsDef;
1797
1798 // For defs, check if we can rename the first def of RegToRename.
1799 if (FoundDef) {
1800 // For some pseudo instructions, we might not generate code in the end
1801 // (e.g. KILL) and we would end up without a correct def for the rename
1802 // register.
1803 // TODO: This might be overly conservative and we could handle those cases
1804 // in multiple ways:
1805 // 1. Insert an extra copy, to materialize the def.
1806 // 2. Skip pseudo-defs until we find an non-pseudo def.
1807 if (MI.isPseudo()) {
1808 LLVM_DEBUG(dbgs() << " Cannot rename pseudo/bundle instruction\n");
1809 return false;
1810 }
1811
1812 for (auto &MOP : MI.operands()) {
1813 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1814 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1815 continue;
1816 if (!canRenameMOP(MOP, TRI)) {
1817 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1818 return false;
1819 }
1820 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1821 }
1822 return true;
1823 } else {
1824 for (auto &MOP : MI.operands()) {
1825 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1826 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1827 continue;
1828
1829 if (!canRenameMOP(MOP, TRI)) {
1830 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1831 return false;
1832 }
1833 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1834 }
1835 }
1836 return true;
1837 };
1838
1839 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1840 return false;
1841
1842 if (!FoundDef) {
1843 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1844 return false;
1845 }
1846 return true;
1847}
1848
1849// We want to merge the second load into the first by rewriting the usages of
1850// the same reg between first (incl.) and second (excl.). We don't need to care
1851// about any insns before FirstLoad or after SecondLoad.
1852// 1. The second load writes new value into the same reg.
1853// - The renaming is impossible to impact later use of the reg.
1854// - The second load always trash the value written by the first load which
1855// means the reg must be killed before the second load.
1856// 2. The first load must be a def for the same reg so we don't need to look
1857// into anything before it.
1859 MachineInstr &FirstLoad, MachineInstr &SecondLoad,
1860 LiveRegUnits &UsedInBetween,
1862 const TargetRegisterInfo *TRI) {
1863 if (FirstLoad.isPseudo())
1864 return false;
1865
1866 UsedInBetween.accumulate(FirstLoad);
1867 auto RegToRename = getLdStRegOp(FirstLoad).getReg();
1868 bool Success = std::all_of(
1869 FirstLoad.getIterator(), SecondLoad.getIterator(),
1870 [&](MachineInstr &MI) {
1871 LLVM_DEBUG(dbgs() << "Checking " << MI);
1872 // Currently we do not try to rename across frame-setup instructions.
1873 if (MI.getFlag(MachineInstr::FrameSetup)) {
1874 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions "
1875 << "currently\n");
1876 return false;
1877 }
1878
1879 for (auto &MOP : MI.operands()) {
1880 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1881 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1882 continue;
1883 if (!canRenameMOP(MOP, TRI)) {
1884 LLVM_DEBUG(dbgs() << " Cannot rename " << MOP << " in " << MI);
1885 return false;
1886 }
1887 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1888 }
1889
1890 return true;
1891 });
1892 return Success;
1893}
1894
1895// Check if we can find a physical register for renaming \p Reg. This register
1896// must:
1897// * not be defined already in \p DefinedInBB; DefinedInBB must contain all
1898// defined registers up to the point where the renamed register will be used,
1899// * not used in \p UsedInBetween; UsedInBetween must contain all accessed
1900// registers in the range the rename register will be used,
1901// * is available in all used register classes (checked using RequiredClasses).
1902static std::optional<MCPhysReg> tryToFindRegisterToRename(
1903 const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB,
1904 LiveRegUnits &UsedInBetween,
1906 const TargetRegisterInfo *TRI) {
1908
1909 // Checks if any sub- or super-register of PR is callee saved.
1910 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1911 return any_of(TRI->sub_and_superregs_inclusive(PR),
1912 [&MF, TRI](MCPhysReg SubOrSuper) {
1913 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1914 });
1915 };
1916
1917 // Check if PR or one of its sub- or super-registers can be used for all
1918 // required register classes.
1919 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1920 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1921 return any_of(
1922 TRI->sub_and_superregs_inclusive(PR),
1923 [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); });
1924 });
1925 };
1926
1927 auto *RegClass = TRI->getMinimalPhysRegClass(Reg);
1928 for (const MCPhysReg &PR : *RegClass) {
1929 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1930 !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1931 CanBeUsedForAllClasses(PR)) {
1932 DefinedInBB.addReg(PR);
1933 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1934 << "\n");
1935 return {PR};
1936 }
1937 }
1938 LLVM_DEBUG(dbgs() << "No rename register found from "
1939 << TRI->getRegClassName(RegClass) << "\n");
1940 return std::nullopt;
1941}
1942
1943// For store pairs: returns a register from FirstMI to the beginning of the
1944// block that can be renamed.
1945// For load pairs: returns a register from FirstMI to MI that can be renamed.
1946static std::optional<MCPhysReg> findRenameRegForSameLdStRegPair(
1947 std::optional<bool> MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI,
1948 Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween,
1950 const TargetRegisterInfo *TRI) {
1951 std::optional<MCPhysReg> RenameReg;
1952 if (!DebugCounter::shouldExecute(RegRenamingCounter))
1953 return RenameReg;
1954
1955 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1956 MachineFunction &MF = *FirstMI.getParent()->getParent();
1957 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1958 return RenameReg;
1959
1960 const bool IsLoad = FirstMI.mayLoad();
1961
1962 if (!MaybeCanRename) {
1963 if (IsLoad)
1964 MaybeCanRename = {canRenameUntilSecondLoad(FirstMI, MI, UsedInBetween,
1965 RequiredClasses, TRI)};
1966 else
1967 MaybeCanRename = {
1968 canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)};
1969 }
1970
1971 if (*MaybeCanRename) {
1972 RenameReg = tryToFindRegisterToRename(MF, Reg, DefinedInBB, UsedInBetween,
1973 RequiredClasses, TRI);
1974 }
1975 return RenameReg;
1976}
1977
1978/// Scan the instructions looking for a load/store that can be combined with the
1979/// current instruction into a wider equivalent or a load/store pair.
1981AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1982 LdStPairFlags &Flags, unsigned Limit,
1983 bool FindNarrowMerge) {
1984 MachineBasicBlock::iterator E = I->getParent()->end();
1986 MachineBasicBlock::iterator MBBIWithRenameReg;
1987 MachineInstr &FirstMI = *I;
1988 MBBI = next_nodbg(MBBI, E);
1989
1990 bool MayLoad = FirstMI.mayLoad();
1991 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1992 Register Reg = getLdStRegOp(FirstMI).getReg();
1995 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1996 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1997
1998 std::optional<bool> MaybeCanRename;
1999 if (!EnableRenaming)
2000 MaybeCanRename = {false};
2001
2002 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
2003 LiveRegUnits UsedInBetween;
2004 UsedInBetween.init(*TRI);
2005
2006 Flags.clearRenameReg();
2007
2008 // Track which register units have been modified and used between the first
2009 // insn (inclusive) and the second insn.
2010 ModifiedRegUnits.clear();
2011 UsedRegUnits.clear();
2012
2013 // Remember any instructions that read/write memory between FirstMI and MI.
2014 SmallVector<MachineInstr *, 4> MemInsns;
2015
2016 LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
2017 for (unsigned Count = 0; MBBI != E && Count < Limit;
2018 MBBI = next_nodbg(MBBI, E)) {
2019 MachineInstr &MI = *MBBI;
2020 LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
2021
2022 UsedInBetween.accumulate(MI);
2023
2024 // Don't count transient instructions towards the search limit since there
2025 // may be different numbers of them if e.g. debug information is present.
2026 if (!MI.isTransient())
2027 ++Count;
2028
2029 Flags.setSExtIdx(-1);
2030 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
2032 assert(MI.mayLoadOrStore() && "Expected memory operation.");
2033 // If we've found another instruction with the same opcode, check to see
2034 // if the base and offset are compatible with our starting instruction.
2035 // These instructions all have scaled immediate operands, so we just
2036 // check for +1/-1. Make sure to check the new instruction offset is
2037 // actually an immediate and not a symbolic reference destined for
2038 // a relocation.
2041 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
2042 if (IsUnscaled != MIIsUnscaled) {
2043 // We're trying to pair instructions that differ in how they are scaled.
2044 // If FirstMI is scaled then scale the offset of MI accordingly.
2045 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
2046 int MemSize = TII->getMemScale(MI);
2047 if (MIIsUnscaled) {
2048 // If the unscaled offset isn't a multiple of the MemSize, we can't
2049 // pair the operations together: bail and keep looking.
2050 if (MIOffset % MemSize) {
2051 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2052 UsedRegUnits, TRI);
2053 MemInsns.push_back(&MI);
2054 continue;
2055 }
2056 MIOffset /= MemSize;
2057 } else {
2058 MIOffset *= MemSize;
2059 }
2060 }
2061
2062 bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
2063
2064 if (BaseReg == MIBaseReg) {
2065 // If the offset of the second ld/st is not equal to the size of the
2066 // destination register it can’t be paired with a pre-index ld/st
2067 // pair. Additionally if the base reg is used or modified the operations
2068 // can't be paired: bail and keep looking.
2069 if (IsPreLdSt) {
2070 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
2071 bool IsBaseRegUsed = !UsedRegUnits.available(
2073 bool IsBaseRegModified = !ModifiedRegUnits.available(
2075 // If the stored value and the address of the second instruction is
2076 // the same, it needs to be using the updated register and therefore
2077 // it must not be folded.
2078 bool IsMIRegTheSame =
2079 TRI->regsOverlap(getLdStRegOp(MI).getReg(),
2081 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
2082 IsMIRegTheSame) {
2083 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2084 UsedRegUnits, TRI);
2085 MemInsns.push_back(&MI);
2086 continue;
2087 }
2088 } else {
2089 if ((Offset != MIOffset + OffsetStride) &&
2090 (Offset + OffsetStride != MIOffset)) {
2091 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2092 UsedRegUnits, TRI);
2093 MemInsns.push_back(&MI);
2094 continue;
2095 }
2096 }
2097
2098 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
2099 if (FindNarrowMerge) {
2100 // If the alignment requirements of the scaled wide load/store
2101 // instruction can't express the offset of the scaled narrow input,
2102 // bail and keep looking. For promotable zero stores, allow only when
2103 // the stored value is the same (i.e., WZR).
2104 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
2105 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
2106 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2107 UsedRegUnits, TRI);
2108 MemInsns.push_back(&MI);
2109 continue;
2110 }
2111 } else {
2112 // Pairwise instructions have a 7-bit signed offset field. Single
2113 // insns have a 12-bit unsigned offset field. If the resultant
2114 // immediate offset of merging these instructions is out of range for
2115 // a pairwise instruction, bail and keep looking.
2116 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
2117 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2118 UsedRegUnits, TRI);
2119 MemInsns.push_back(&MI);
2120 LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
2121 << "keep looking.\n");
2122 continue;
2123 }
2124 // If the alignment requirements of the paired (scaled) instruction
2125 // can't express the offset of the unscaled input, bail and keep
2126 // looking.
2127 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
2128 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2129 UsedRegUnits, TRI);
2130 MemInsns.push_back(&MI);
2132 << "Offset doesn't fit due to alignment requirements, "
2133 << "keep looking.\n");
2134 continue;
2135 }
2136 }
2137
2138 // If the BaseReg has been modified, then we cannot do the optimization.
2139 // For example, in the following pattern
2140 // ldr x1 [x2]
2141 // ldr x2 [x3]
2142 // ldr x4 [x2, #8],
2143 // the first and third ldr cannot be converted to ldp x1, x4, [x2]
2144 if (!ModifiedRegUnits.available(BaseReg))
2145 return E;
2146
2147 const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
2149
2150 // If the Rt of the second instruction (destination register of the
2151 // load) was not modified or used between the two instructions and none
2152 // of the instructions between the second and first alias with the
2153 // second, we can combine the second into the first.
2154 bool RtNotModified =
2155 ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
2156 bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
2157 !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
2158
2159 LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
2160 << "Reg '" << getLdStRegOp(MI) << "' not modified: "
2161 << (RtNotModified ? "true" : "false") << "\n"
2162 << "Reg '" << getLdStRegOp(MI) << "' not used: "
2163 << (RtNotUsed ? "true" : "false") << "\n");
2164
2165 if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
2166 // For pairs loading into the same reg, try to find a renaming
2167 // opportunity to allow the renaming of Reg between FirstMI and MI
2168 // and combine MI into FirstMI; otherwise bail and keep looking.
2169 if (SameLoadReg) {
2170 std::optional<MCPhysReg> RenameReg =
2171 findRenameRegForSameLdStRegPair(MaybeCanRename, FirstMI, MI,
2172 Reg, DefinedInBB, UsedInBetween,
2173 RequiredClasses, TRI);
2174 if (!RenameReg) {
2175 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
2176 UsedRegUnits, TRI);
2177 MemInsns.push_back(&MI);
2178 LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
2179 << "keep looking.\n");
2180 continue;
2181 }
2182 Flags.setRenameReg(*RenameReg);
2183 }
2184
2185 Flags.setMergeForward(false);
2186 if (!SameLoadReg)
2187 Flags.clearRenameReg();
2188 return MBBI;
2189 }
2190
2191 // Likewise, if the Rt of the first instruction is not modified or used
2192 // between the two instructions and none of the instructions between the
2193 // first and the second alias with the first, we can combine the first
2194 // into the second.
2195 RtNotModified = !(
2196 MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
2197
2198 LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
2199 << "Reg '" << getLdStRegOp(FirstMI)
2200 << "' not modified: "
2201 << (RtNotModified ? "true" : "false") << "\n");
2202
2203 if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
2204 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
2205 Flags.setMergeForward(true);
2206 Flags.clearRenameReg();
2207 return MBBI;
2208 }
2209
2210 std::optional<MCPhysReg> RenameReg = findRenameRegForSameLdStRegPair(
2211 MaybeCanRename, FirstMI, MI, Reg, DefinedInBB, UsedInBetween,
2212 RequiredClasses, TRI);
2213 if (RenameReg) {
2214 Flags.setMergeForward(true);
2215 Flags.setRenameReg(*RenameReg);
2216 MBBIWithRenameReg = MBBI;
2217 }
2218 }
2219 LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
2220 << "interference in between, keep looking.\n");
2221 }
2222 }
2223
2224 if (Flags.getRenameReg())
2225 return MBBIWithRenameReg;
2226
2227 // If the instruction wasn't a matching load or store. Stop searching if we
2228 // encounter a call instruction that might modify memory.
2229 if (MI.isCall()) {
2230 LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
2231 return E;
2232 }
2233
2234 // Update modified / uses register units.
2235 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2236
2237 // Otherwise, if the base register is modified, we have no match, so
2238 // return early.
2239 if (!ModifiedRegUnits.available(BaseReg)) {
2240 LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
2241 return E;
2242 }
2243
2244 // Update list of instructions that read/write memory.
2245 if (MI.mayLoadOrStore())
2246 MemInsns.push_back(&MI);
2247 }
2248 return E;
2249}
2250
2253 assert((MI.getOpcode() == AArch64::SUBXri ||
2254 MI.getOpcode() == AArch64::ADDXri) &&
2255 "Expected a register update instruction");
2256 auto End = MI.getParent()->end();
2257 if (MaybeCFI == End ||
2258 MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
2259 !(MI.getFlag(MachineInstr::FrameSetup) ||
2260 MI.getFlag(MachineInstr::FrameDestroy)) ||
2261 MI.getOperand(0).getReg() != AArch64::SP)
2262 return End;
2263
2264 const MachineFunction &MF = *MI.getParent()->getParent();
2265 unsigned CFIIndex = MaybeCFI->getOperand(0).getCFIIndex();
2266 const MCCFIInstruction &CFI = MF.getFrameInstructions()[CFIIndex];
2267 switch (CFI.getOperation()) {
2270 return MaybeCFI;
2271 default:
2272 return End;
2273 }
2274}
2275
2276std::optional<MachineBasicBlock::iterator> AArch64LoadStoreOpt::mergeUpdateInsn(
2278 bool IsForward, bool IsPreIdx, bool MergeEither) {
2279 assert((Update->getOpcode() == AArch64::ADDXri ||
2280 Update->getOpcode() == AArch64::SUBXri) &&
2281 "Unexpected base register update instruction to merge!");
2282 MachineBasicBlock::iterator E = I->getParent()->end();
2284
2285 // If updating the SP and the following instruction is CFA offset related CFI,
2286 // make sure the CFI follows the SP update either by merging at the location
2287 // of the update or by moving the CFI after the merged instruction. If unable
2288 // to do so, bail.
2289 MachineBasicBlock::iterator InsertPt = I;
2290 if (IsForward) {
2291 assert(IsPreIdx);
2292 if (auto CFI = maybeMoveCFI(*Update, next_nodbg(Update, E)); CFI != E) {
2293 if (MergeEither) {
2294 InsertPt = Update;
2295 } else {
2296 // Take care not to reorder CFIs.
2297 if (std::any_of(std::next(CFI), I, [](const auto &Insn) {
2298 return Insn.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
2299 }))
2300 return std::nullopt;
2301
2302 MachineBasicBlock *MBB = InsertPt->getParent();
2303 MBB->splice(std::next(InsertPt), MBB, CFI);
2304 }
2305 }
2306 }
2307
2308 // Return the instruction following the merged instruction, which is
2309 // the instruction following our unmerged load. Unless that's the add/sub
2310 // instruction we're merging, in which case it's the one after that.
2311 if (NextI == Update)
2312 NextI = next_nodbg(NextI, E);
2313
2314 int Value = Update->getOperand(2).getImm();
2315 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
2316 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
2317 if (Update->getOpcode() == AArch64::SUBXri)
2318 Value = -Value;
2319
2320 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
2322 MachineInstrBuilder MIB;
2323 int Scale, MinOffset, MaxOffset;
2324 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
2326 // Non-paired instruction.
2327 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2328 TII->get(NewOpc))
2329 .add(Update->getOperand(0))
2330 .add(getLdStRegOp(*I))
2332 .addImm(Value / Scale)
2333 .setMemRefs(I->memoperands())
2334 .setMIFlags(I->mergeFlagsWith(*Update));
2335 } else {
2336 // Paired instruction.
2337 MIB = BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
2338 TII->get(NewOpc))
2339 .add(Update->getOperand(0))
2340 .add(getLdStRegOp(*I, 0))
2341 .add(getLdStRegOp(*I, 1))
2343 .addImm(Value / Scale)
2344 .setMemRefs(I->memoperands())
2345 .setMIFlags(I->mergeFlagsWith(*Update));
2346 }
2347
2348 if (IsPreIdx) {
2349 ++NumPreFolded;
2350 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
2351 } else {
2352 ++NumPostFolded;
2353 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
2354 }
2355 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2356 LLVM_DEBUG(I->print(dbgs()));
2357 LLVM_DEBUG(dbgs() << " ");
2358 LLVM_DEBUG(Update->print(dbgs()));
2359 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2360 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
2361 LLVM_DEBUG(dbgs() << "\n");
2362
2363 // Erase the old instructions for the block.
2364 I->eraseFromParent();
2365 Update->eraseFromParent();
2366
2367 return NextI;
2368}
2369
2371AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
2373 unsigned Offset, int Scale) {
2374 assert((Update->getOpcode() == AArch64::MOVKWi) &&
2375 "Unexpected const mov instruction to merge!");
2376 MachineBasicBlock::iterator E = I->getParent()->end();
2378 MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
2379 MachineInstr &MemMI = *I;
2380 unsigned Mask = (1 << 12) * Scale - 1;
2381 unsigned Low = Offset & Mask;
2382 unsigned High = Offset - Low;
2385 MachineInstrBuilder AddMIB, MemMIB;
2386
2387 // Add IndexReg, BaseReg, High (the BaseReg may be SP)
2388 AddMIB =
2389 BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2390 .addDef(IndexReg)
2391 .addUse(BaseReg)
2392 .addImm(High >> 12) // shifted value
2393 .addImm(12); // shift 12
2394 (void)AddMIB;
2395 // Ld/St DestReg, IndexReg, Imm12
2396 unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
2397 MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2398 .add(getLdStRegOp(MemMI))
2400 .addImm(Low / Scale)
2401 .setMemRefs(I->memoperands())
2402 .setMIFlags(I->mergeFlagsWith(*Update));
2403 (void)MemMIB;
2404
2405 ++NumConstOffsetFolded;
2406 LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2407 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2408 LLVM_DEBUG(PrevI->print(dbgs()));
2409 LLVM_DEBUG(dbgs() << " ");
2410 LLVM_DEBUG(Update->print(dbgs()));
2411 LLVM_DEBUG(dbgs() << " ");
2412 LLVM_DEBUG(I->print(dbgs()));
2413 LLVM_DEBUG(dbgs() << " with instruction:\n ");
2414 LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
2415 LLVM_DEBUG(dbgs() << " ");
2416 LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
2417 LLVM_DEBUG(dbgs() << "\n");
2418
2419 // Erase the old instructions for the block.
2420 I->eraseFromParent();
2421 PrevI->eraseFromParent();
2422 Update->eraseFromParent();
2423
2424 return NextI;
2425}
2426
2427bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2428 MachineInstr &MI,
2429 unsigned BaseReg, int Offset) {
2430 switch (MI.getOpcode()) {
2431 default:
2432 break;
2433 case AArch64::SUBXri:
2434 case AArch64::ADDXri:
2435 // Make sure it's a vanilla immediate operand, not a relocation or
2436 // anything else we can't handle.
2437 if (!MI.getOperand(2).isImm())
2438 break;
2439 // Watch out for 1 << 12 shifted value.
2440 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
2441 break;
2442
2443 // The update instruction source and destination register must be the
2444 // same as the load/store base register.
2445 if (MI.getOperand(0).getReg() != BaseReg ||
2446 MI.getOperand(1).getReg() != BaseReg)
2447 break;
2448
2449 int UpdateOffset = MI.getOperand(2).getImm();
2450 if (MI.getOpcode() == AArch64::SUBXri)
2451 UpdateOffset = -UpdateOffset;
2452
2453 // The immediate must be a multiple of the scaling factor of the pre/post
2454 // indexed instruction.
2455 int Scale, MinOffset, MaxOffset;
2456 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
2457 if (UpdateOffset % Scale != 0)
2458 break;
2459
2460 // Scaled offset must fit in the instruction immediate.
2461 int ScaledOffset = UpdateOffset / Scale;
2462 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
2463 break;
2464
2465 // If we have a non-zero Offset, we check that it matches the amount
2466 // we're adding to the register.
2467 if (!Offset || Offset == UpdateOffset)
2468 return true;
2469 break;
2470 }
2471 return false;
2472}
2473
2474bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2475 MachineInstr &MI,
2476 unsigned IndexReg,
2477 unsigned &Offset) {
2478 // The update instruction source and destination register must be the
2479 // same as the load/store index register.
2480 if (MI.getOpcode() == AArch64::MOVKWi &&
2481 TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2482
2483 // movz + movk hold a large offset of a Ld/St instruction.
2484 MachineBasicBlock::iterator B = MI.getParent()->begin();
2486 // Skip the scene when the MI is the first instruction of a block.
2487 if (MBBI == B)
2488 return false;
2489 MBBI = prev_nodbg(MBBI, B);
2490 MachineInstr &MovzMI = *MBBI;
2491 // Make sure the MOVKWi and MOVZWi set the same register.
2492 if (MovzMI.getOpcode() == AArch64::MOVZWi &&
2493 MovzMI.getOperand(0).getReg() == MI.getOperand(0).getReg()) {
2494 unsigned Low = MovzMI.getOperand(1).getImm();
2495 unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2496 Offset = High + Low;
2497 // 12-bit optionally shifted immediates are legal for adds.
2498 return Offset >> 24 == 0;
2499 }
2500 }
2501 return false;
2502}
2503
2504MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
2505 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
2506 MachineBasicBlock::iterator E = I->getParent()->end();
2507 MachineInstr &MemMI = *I;
2509
2511 int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
2512 TII->getMemScale(MemMI);
2513
2514 // Scan forward looking for post-index opportunities. Updating instructions
2515 // can't be formed if the memory instruction doesn't have the offset we're
2516 // looking for.
2517 if (MIUnscaledOffset != UnscaledOffset)
2518 return E;
2519
2520 // If the base register overlaps a source/destination register, we can't
2521 // merge the update. This does not apply to tag store instructions which
2522 // ignore the address part of the source register.
2523 // This does not apply to STGPi as well, which does not have unpredictable
2524 // behavior in this case unlike normal stores, and always performs writeback
2525 // after reading the source register value.
2526 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
2527 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2528 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
2529 Register DestReg = getLdStRegOp(MemMI, i).getReg();
2530 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
2531 return E;
2532 }
2533 }
2534
2535 // Track which register units have been modified and used between the first
2536 // insn (inclusive) and the second insn.
2537 ModifiedRegUnits.clear();
2538 UsedRegUnits.clear();
2539 MBBI = next_nodbg(MBBI, E);
2540
2541 // We can't post-increment the stack pointer if any instruction between
2542 // the memory access (I) and the increment (MBBI) can access the memory
2543 // region defined by [SP, MBBI].
2544 const bool BaseRegSP = BaseReg == AArch64::SP;
2545 if (BaseRegSP && needsWinCFI(I->getMF())) {
2546 // FIXME: For now, we always block the optimization over SP in windows
2547 // targets as it requires to adjust the unwind/debug info, messing up
2548 // the unwind info can actually cause a miscompile.
2549 return E;
2550 }
2551
2552 unsigned Count = 0;
2553 MachineBasicBlock *CurMBB = I->getParent();
2554 // choice of next block to visit is liveins-based
2555 bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
2556
2557 while (true) {
2558 for (MachineBasicBlock::iterator CurEnd = CurMBB->end();
2559 MBBI != CurEnd && Count < Limit; MBBI = next_nodbg(MBBI, CurEnd)) {
2560 MachineInstr &MI = *MBBI;
2561
2562 // Don't count transient instructions towards the search limit since there
2563 // may be different numbers of them if e.g. debug information is present.
2564 if (!MI.isTransient())
2565 ++Count;
2566
2567 // If we found a match, return it.
2568 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2569 return MBBI;
2570
2571 // Update the status of what the instruction clobbered and used.
2572 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2573 TRI);
2574
2575 // Otherwise, if the base register is used or modified, we have no match,
2576 // so return early. If we are optimizing SP, do not allow instructions
2577 // that may load or store in between the load and the optimized value
2578 // update.
2579 if (!ModifiedRegUnits.available(BaseReg) ||
2580 !UsedRegUnits.available(BaseReg) ||
2581 (BaseRegSP && MBBI->mayLoadOrStore()))
2582 return E;
2583 }
2584
2585 if (!VisitSucc || Limit <= Count)
2586 break;
2587
2588 // Try to go downward to successors along a CF path w/o side enters
2589 // such that BaseReg is alive along it but not at its exits
2590 MachineBasicBlock *SuccToVisit = nullptr;
2591 unsigned LiveSuccCount = 0;
2592 for (MachineBasicBlock *Succ : CurMBB->successors()) {
2593 for (MCRegAliasIterator AI(BaseReg, TRI, true); AI.isValid(); ++AI) {
2594 if (Succ->isLiveIn(*AI)) {
2595 if (LiveSuccCount++)
2596 return E;
2597 if (Succ->pred_size() == 1)
2598 SuccToVisit = Succ;
2599 break;
2600 }
2601 }
2602 }
2603 if (!SuccToVisit)
2604 break;
2605 CurMBB = SuccToVisit;
2606 MBBI = CurMBB->begin();
2607 }
2608
2609 return E;
2610}
2611
2612MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2613 MachineBasicBlock::iterator I, unsigned Limit, bool &MergeEither) {
2614 MachineBasicBlock::iterator B = I->getParent()->begin();
2615 MachineBasicBlock::iterator E = I->getParent()->end();
2616 MachineInstr &MemMI = *I;
2618 MachineFunction &MF = *MemMI.getMF();
2619
2622
2623 bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
2624 Register DestReg[] = {getLdStRegOp(MemMI, 0).getReg(),
2625 IsPairedInsn ? getLdStRegOp(MemMI, 1).getReg()
2626 : AArch64::NoRegister};
2627
2628 // If the load/store is the first instruction in the block, there's obviously
2629 // not any matching update. Ditto if the memory offset isn't zero.
2630 if (MBBI == B || Offset != 0)
2631 return E;
2632 // If the base register overlaps a destination register, we can't
2633 // merge the update.
2634 if (!isTagStore(MemMI)) {
2635 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i)
2636 if (DestReg[i] == BaseReg || TRI->isSubRegister(BaseReg, DestReg[i]))
2637 return E;
2638 }
2639
2640 const bool BaseRegSP = BaseReg == AArch64::SP;
2641 if (BaseRegSP && needsWinCFI(I->getMF())) {
2642 // FIXME: For now, we always block the optimization over SP in windows
2643 // targets as it requires to adjust the unwind/debug info, messing up
2644 // the unwind info can actually cause a miscompile.
2645 return E;
2646 }
2647
2648 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2649 unsigned RedZoneSize =
2650 Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
2651
2652 // Track which register units have been modified and used between the first
2653 // insn (inclusive) and the second insn.
2654 ModifiedRegUnits.clear();
2655 UsedRegUnits.clear();
2656 unsigned Count = 0;
2657 bool MemAccessBeforeSPPreInc = false;
2658 MergeEither = true;
2659 do {
2660 MBBI = prev_nodbg(MBBI, B);
2661 MachineInstr &MI = *MBBI;
2662
2663 // Don't count transient instructions towards the search limit since there
2664 // may be different numbers of them if e.g. debug information is present.
2665 if (!MI.isTransient())
2666 ++Count;
2667
2668 // If we found a match, return it.
2669 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2670 // Check that the update value is within our red zone limit (which may be
2671 // zero).
2672 if (MemAccessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2673 return E;
2674 return MBBI;
2675 }
2676
2677 // Update the status of what the instruction clobbered and used.
2678 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2679
2680 // Otherwise, if the base register is used or modified, we have no match, so
2681 // return early.
2682 if (!ModifiedRegUnits.available(BaseReg) ||
2683 !UsedRegUnits.available(BaseReg))
2684 return E;
2685
2686 // If we have a destination register (i.e. a load instruction) and a
2687 // destination register is used or modified, then we can only merge forward,
2688 // i.e. the combined instruction is put in the place of the memory
2689 // instruction. Same applies if we see a memory access or side effects.
2690 if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects() ||
2691 (DestReg[0] != AArch64::NoRegister &&
2692 !(ModifiedRegUnits.available(DestReg[0]) &&
2693 UsedRegUnits.available(DestReg[0]))) ||
2694 (DestReg[1] != AArch64::NoRegister &&
2695 !(ModifiedRegUnits.available(DestReg[1]) &&
2696 UsedRegUnits.available(DestReg[1]))))
2697 MergeEither = false;
2698
2699 // Keep track if we have a memory access before an SP pre-increment, in this
2700 // case we need to validate later that the update amount respects the red
2701 // zone.
2702 if (BaseRegSP && MBBI->mayLoadOrStore())
2703 MemAccessBeforeSPPreInc = true;
2704 } while (MBBI != B && Count < Limit);
2705 return E;
2706}
2707
2709AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2710 MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2711 MachineBasicBlock::iterator B = I->getParent()->begin();
2712 MachineBasicBlock::iterator E = I->getParent()->end();
2713 MachineInstr &MemMI = *I;
2715
2716 // If the load is the first instruction in the block, there's obviously
2717 // not any matching load or store.
2718 if (MBBI == B)
2719 return E;
2720
2721 // Make sure the IndexReg is killed and the shift amount is zero.
2722 // TODO: Relex this restriction to extend, simplify processing now.
2723 if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
2724 !AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
2726 return E;
2727
2729
2730 // Track which register units have been modified and used between the first
2731 // insn (inclusive) and the second insn.
2732 ModifiedRegUnits.clear();
2733 UsedRegUnits.clear();
2734 unsigned Count = 0;
2735 do {
2736 MBBI = prev_nodbg(MBBI, B);
2737 MachineInstr &MI = *MBBI;
2738
2739 // Don't count transient instructions towards the search limit since there
2740 // may be different numbers of them if e.g. debug information is present.
2741 if (!MI.isTransient())
2742 ++Count;
2743
2744 // If we found a match, return it.
2745 if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2746 return MBBI;
2747 }
2748
2749 // Update the status of what the instruction clobbered and used.
2750 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2751
2752 // Otherwise, if the index register is used or modified, we have no match,
2753 // so return early.
2754 if (!ModifiedRegUnits.available(IndexReg) ||
2755 !UsedRegUnits.available(IndexReg))
2756 return E;
2757
2758 } while (MBBI != B && Count < Limit);
2759 return E;
2760}
2761
2762bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2764 MachineInstr &MI = *MBBI;
2765 // If this is a volatile load, don't mess with it.
2766 if (MI.hasOrderedMemoryRef())
2767 return false;
2768
2769 if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy))
2770 return false;
2771
2772 // Make sure this is a reg+imm.
2773 // FIXME: It is possible to extend it to handle reg+reg cases.
2775 return false;
2776
2777 // Look backward up to LdStLimit instructions.
2779 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2780 ++NumLoadsFromStoresPromoted;
2781 // Promote the load. Keeping the iterator straight is a
2782 // pain, so we let the merge routine tell us what the next instruction
2783 // is after it's done mucking about.
2784 MBBI = promoteLoadFromStore(MBBI, StoreI);
2785 return true;
2786 }
2787 return false;
2788}
2789
2790// Merge adjacent zero stores into a wider store.
2791bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2793 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2794 MachineInstr &MI = *MBBI;
2795 MachineBasicBlock::iterator E = MI.getParent()->end();
2796
2797 if (!TII->isCandidateToMergeOrPair(MI))
2798 return false;
2799
2800 // Look ahead up to LdStLimit instructions for a mergeable instruction.
2801 LdStPairFlags Flags;
2803 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2804 if (MergeMI != E) {
2805 ++NumZeroStoresPromoted;
2806
2807 // Keeping the iterator straight is a pain, so we let the merge routine tell
2808 // us what the next instruction is after it's done mucking about.
2809 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2810 return true;
2811 }
2812 return false;
2813}
2814
2815// Find loads and stores that can be merged into a single load or store pair
2816// instruction.
2817bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2818 MachineInstr &MI = *MBBI;
2819 MachineBasicBlock::iterator E = MI.getParent()->end();
2820
2821 if (!TII->isCandidateToMergeOrPair(MI))
2822 return false;
2823
2824 // If disable-ldp feature is opted, do not emit ldp.
2825 if (MI.mayLoad() && Subtarget->hasDisableLdp())
2826 return false;
2827
2828 // If disable-stp feature is opted, do not emit stp.
2829 if (MI.mayStore() && Subtarget->hasDisableStp())
2830 return false;
2831
2832 // Early exit if the offset is not possible to match. (6 bits of positive
2833 // range, plus allow an extra one in case we find a later insn that matches
2834 // with Offset-1)
2835 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2837 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2838 // Allow one more for offset.
2839 if (Offset > 0)
2840 Offset -= OffsetStride;
2841 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2842 return false;
2843
2844 // Look ahead up to LdStLimit instructions for a pairable instruction.
2845 LdStPairFlags Flags;
2847 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2848 if (Paired != E) {
2849 // Keeping the iterator straight is a pain, so we let the merge routine tell
2850 // us what the next instruction is after it's done mucking about.
2851 auto Prev = std::prev(MBBI);
2852
2853 // Fetch the memoperand of the load/store that is a candidate for
2854 // combination.
2855 MachineMemOperand *MemOp =
2856 MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2857
2858 // If a load/store arrives and ldp/stp-aligned-only feature is opted, check
2859 // that the alignment of the source pointer is at least double the alignment
2860 // of the type.
2861 if ((MI.mayLoad() && Subtarget->hasLdpAlignedOnly()) ||
2862 (MI.mayStore() && Subtarget->hasStpAlignedOnly())) {
2863 // If there is no size/align information, cancel the transformation.
2864 if (!MemOp || !MemOp->getMemoryType().isValid()) {
2865 NumFailedAlignmentCheck++;
2866 return false;
2867 }
2868
2869 // Get the needed alignments to check them if
2870 // ldp-aligned-only/stp-aligned-only features are opted.
2871 uint64_t MemAlignment = MemOp->getAlign().value();
2872 uint64_t TypeAlignment =
2873 Align(MemOp->getSize().getValue().getKnownMinValue()).value();
2874
2875 if (MemAlignment < 2 * TypeAlignment) {
2876 NumFailedAlignmentCheck++;
2877 return false;
2878 }
2879 }
2880
2881 ++NumPairCreated;
2882 if (TII->hasUnscaledLdStOffset(MI))
2883 ++NumUnscaledPairCreated;
2884
2885 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2886 // Collect liveness info for instructions between Prev and the new position
2887 // MBBI.
2888 for (auto I = std::next(Prev); I != MBBI; I++)
2889 updateDefinedRegisters(*I, DefinedInBB, TRI);
2890
2891 return true;
2892 }
2893 return false;
2894}
2895
2896bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2898 MachineInstr &MI = *MBBI;
2899 MachineBasicBlock::iterator E = MI.getParent()->end();
2901
2902 // Look forward to try to form a post-index instruction. For example,
2903 // ldr x0, [x20]
2904 // add x20, x20, #32
2905 // merged into:
2906 // ldr x0, [x20], #32
2907 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2908 if (Update != E) {
2909 // Merge the update into the ld/st.
2910 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2911 /*IsPreIdx=*/false,
2912 /*MergeEither=*/false)) {
2913 MBBI = *NextI;
2914 return true;
2915 }
2916 }
2917
2918 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2919 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2920 return false;
2921
2922 // Look back to try to find a pre-index instruction. For example,
2923 // add x0, x0, #8
2924 // ldr x1, [x0]
2925 // merged into:
2926 // ldr x1, [x0, #8]!
2927 bool MergeEither;
2928 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit, MergeEither);
2929 if (Update != E) {
2930 // Merge the update into the ld/st.
2931 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/true,
2932 /*IsPreIdx=*/true, MergeEither)) {
2933 MBBI = *NextI;
2934 return true;
2935 }
2936 }
2937
2938 // The immediate in the load/store is scaled by the size of the memory
2939 // operation. The immediate in the add we're looking for,
2940 // however, is not, so adjust here.
2941 int UnscaledOffset =
2943
2944 // Look forward to try to find a pre-index instruction. For example,
2945 // ldr x1, [x0, #64]
2946 // add x0, x0, #64
2947 // merged into:
2948 // ldr x1, [x0, #64]!
2949 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2950 if (Update != E) {
2951 // Merge the update into the ld/st.
2952 if (auto NextI = mergeUpdateInsn(MBBI, Update, /*IsForward=*/false,
2953 /*IsPreIdx=*/true,
2954 /*MergeEither=*/false)) {
2955 MBBI = *NextI;
2956 return true;
2957 }
2958 }
2959
2960 return false;
2961}
2962
2963bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
2964 int Scale) {
2965 MachineInstr &MI = *MBBI;
2966 MachineBasicBlock::iterator E = MI.getParent()->end();
2968
2969 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2970 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2971 return false;
2972
2973 // Look back to try to find a const offset for index LdSt instruction. For
2974 // example,
2975 // mov x8, #LargeImm ; = a * (1<<12) + imm12
2976 // ldr x1, [x0, x8]
2977 // merged into:
2978 // add x8, x0, a * (1<<12)
2979 // ldr x1, [x8, imm12]
2980 unsigned Offset;
2981 Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
2982 if (Update != E && (Offset & (Scale - 1)) == 0) {
2983 // Merge the imm12 into the ld/st.
2984 MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
2985 return true;
2986 }
2987
2988 return false;
2989}
2990
2991bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
2992 bool EnableNarrowZeroStOpt) {
2993 AArch64FunctionInfo &AFI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
2994
2995 bool Modified = false;
2996 // Four transformations to do here:
2997 // 1) Find loads that directly read from stores and promote them by
2998 // replacing with mov instructions. If the store is wider than the load,
2999 // the load will be replaced with a bitfield extract.
3000 // e.g.,
3001 // str w1, [x0, #4]
3002 // ldrh w2, [x0, #6]
3003 // ; becomes
3004 // str w1, [x0, #4]
3005 // lsr w2, w1, #16
3007 MBBI != E;) {
3008 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
3009 Modified = true;
3010 else
3011 ++MBBI;
3012 }
3013 // 2) Merge adjacent zero stores into a wider store.
3014 // e.g.,
3015 // strh wzr, [x0]
3016 // strh wzr, [x0, #2]
3017 // ; becomes
3018 // str wzr, [x0]
3019 // e.g.,
3020 // str wzr, [x0]
3021 // str wzr, [x0, #4]
3022 // ; becomes
3023 // str xzr, [x0]
3024 if (EnableNarrowZeroStOpt)
3026 MBBI != E;) {
3027 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
3028 Modified = true;
3029 else
3030 ++MBBI;
3031 }
3032 // 3) Find loads and stores that can be merged into a single load or store
3033 // pair instruction.
3034 // When compiling for SVE 128, also try to combine SVE fill/spill
3035 // instructions into LDP/STP.
3036 // e.g.,
3037 // ldr x0, [x2]
3038 // ldr x1, [x2, #8]
3039 // ; becomes
3040 // ldp x0, x1, [x2]
3041 // e.g.,
3042 // ldr z0, [x2]
3043 // ldr z1, [x2, #1, mul vl]
3044 // ; becomes
3045 // ldp q0, q1, [x2]
3046
3048 DefinedInBB.clear();
3049 DefinedInBB.addLiveIns(MBB);
3050 }
3051
3053 MBBI != E;) {
3054 // Track currently live registers up to this point, to help with
3055 // searching for a rename register on demand.
3056 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
3057 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
3058 Modified = true;
3059 else
3060 ++MBBI;
3061 }
3062 // 4) Find base register updates that can be merged into the load or store
3063 // as a base-reg writeback.
3064 // e.g.,
3065 // ldr x0, [x2]
3066 // add x2, x2, #4
3067 // ; becomes
3068 // ldr x0, [x2], #4
3070 MBBI != E;) {
3071 if (isMergeableLdStUpdate(*MBBI, AFI) && tryToMergeLdStUpdate(MBBI))
3072 Modified = true;
3073 else
3074 ++MBBI;
3075 }
3076
3077 // 5) Find a register assigned with a const value that can be combined with
3078 // into the load or store. e.g.,
3079 // mov x8, #LargeImm ; = a * (1<<12) + imm12
3080 // ldr x1, [x0, x8]
3081 // ; becomes
3082 // add x8, x0, a * (1<<12)
3083 // ldr x1, [x8, imm12]
3085 MBBI != E;) {
3086 int Scale;
3087 if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
3088 Modified = true;
3089 else
3090 ++MBBI;
3091 }
3092
3093 return Modified;
3094}
3095
3096bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
3097 if (skipFunction(Fn.getFunction()))
3098 return false;
3099
3100 Subtarget = &Fn.getSubtarget<AArch64Subtarget>();
3101 TII = Subtarget->getInstrInfo();
3102 TRI = Subtarget->getRegisterInfo();
3103 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3104
3105 // Resize the modified and used register unit trackers. We do this once
3106 // per function and then clear the register units each time we optimize a load
3107 // or store.
3108 ModifiedRegUnits.init(*TRI);
3109 UsedRegUnits.init(*TRI);
3110 DefinedInBB.init(*TRI);
3111
3112 bool Modified = false;
3113 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
3114 for (auto &MBB : Fn) {
3115 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
3116 Modified |= M;
3117 }
3118
3119 return Modified;
3120}
3121
3122// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
3123// stores near one another? Note: The pre-RA instruction scheduler already has
3124// hooks to try and schedule pairable loads/stores together to improve pairing
3125// opportunities. Thus, pre-RA pairing pass may not be worth the effort.
3126
3127// FIXME: When pairing store instructions it's very possible for this pass to
3128// hoist a store with a KILL marker above another use (without a KILL marker).
3129// The resulting IR is invalid, but nothing uses the KILL markers after this
3130// pass, so it's never caused a problem in practice.
3131
3132/// createAArch64LoadStoreOptimizationPass - returns an instance of the
3133/// load / store optimization pass.
3135 return new AArch64LoadStoreOpt();
3136}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static cl::opt< bool > EnableRenaming("aarch64-load-store-renaming", cl::init(true), cl::Hidden)
static MachineOperand & getLdStRegOp(MachineInstr &MI, unsigned PairedRegOp=0)
static bool isPromotableLoadFromStore(MachineInstr &MI)
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale, int &MinOffset, int &MaxOffset)
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride)
static unsigned getMatchingPairOpcode(unsigned Opc)
static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII)
static std::optional< MCPhysReg > tryToFindRegisterToRename(const MachineFunction &MF, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool needsWinCFI(const MachineFunction *MF)
static bool canRenameUntilSecondLoad(MachineInstr &FirstLoad, MachineInstr &SecondLoad, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static std::optional< MCPhysReg > findRenameRegForSameLdStRegPair(std::optional< bool > MaybeCanRename, MachineInstr &FirstMI, MachineInstr &MI, Register Reg, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< unsigned > LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden)
static bool canRenameMOP(const MachineOperand &MOP, const TargetRegisterInfo *TRI)
static unsigned getPreIndexedOpcode(unsigned Opc)
#define AARCH64_LOAD_STORE_OPT_NAME
static void addDebugSubstitutionsToTable(MachineFunction *MF, unsigned InstrNumToSet, MachineInstr &OriginalInstr, MachineInstr &MergedInstr)
This function will add a new entry into the debugValueSubstitutions table when two instruction have b...
static cl::opt< unsigned > UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden)
static bool isPromotableZeroStoreInst(MachineInstr &MI)
static unsigned getMatchingWideOpcode(unsigned Opc)
static unsigned getMatchingNonSExtOpcode(unsigned Opc, bool *IsValidLdStrOpc=nullptr)
static MachineBasicBlock::iterator maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI)
static bool isTagStore(const MachineInstr &MI)
static unsigned isMatchingStore(MachineInstr &LoadInst, MachineInstr &StoreInst)
static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, const TargetRegisterInfo *TRI, unsigned Limit, std::function< bool(MachineInstr &, bool)> &Fn)
static bool isRewritableImplicitDef(unsigned Opc)
static unsigned getPostIndexedOpcode(unsigned Opc)
static bool isMergeableLdStUpdate(MachineInstr &MI, AArch64FunctionInfo &AFI)
static cl::opt< unsigned > LdStConstLimit("aarch64-load-store-const-scan-limit", cl::init(10), cl::Hidden)
static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, MachineInstr &StoreInst, const AArch64InstrInfo *TII)
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI)
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale)
static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, const TargetRegisterInfo *TRI)
static bool canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, SmallPtrSetImpl< const TargetRegisterClass * > &RequiredClasses, const TargetRegisterInfo *TRI)
static unsigned getBaseAddressOpcode(unsigned Opc)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:472
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned getRedZoneSize(const Function &F) const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static bool shouldExecute(unsigned CounterName)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:681
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
void clear()
Clears the set.
LLVM_ABI void accumulate(const MachineInstr &MI)
Adds all register units used, defined or clobbered in MI.
An instruction for reading from memory.
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
OpType getOperation() const
Definition MCDwarf.h:720
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & cloneMergedMemRefs(ArrayRef< const MachineInstr * > OtherMIs) const
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI bool mayAlias(BatchAAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
unsigned peekDebugInstrNum() const
Examine the instruction number of this MachineInstr.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
mop_range operands()
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImplicit(bool Val=true)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:19
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition ilist_node.h:123
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
constexpr double e
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< filter_iterator< ConstMIBundleOperands, bool(*)(const MachineOperand &)> > phys_regs_and_masks(const MachineInstr &MI)
Returns an iterator range over all physical register and mask operands for MI and bundled instruction...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
FunctionPass * createAArch64LoadStoreOptimizationPass()
createAArch64LoadStoreOptimizationPass - returns an instance of the load / store optimization pass.
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.