LLVM 18.0.0git
HexagonVLIWPacketizer.cpp
Go to the documentation of this file.
1//===- HexagonPacketizer.cpp - VLIW packetizer ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements a simple VLIW packetizer using DFA. The packetizer works on
10// machine basic blocks. For each instruction I in BB, the packetizer consults
11// the DFA to see if machine resources are available to execute I. If so, the
12// packetizer checks if I depends on any instruction J in the current packet.
13// If no dependency is found, I is added to current packet and machine resource
14// is marked as taken. If any dependency is found, a target API call is made to
15// prune the dependence.
16//
17//===----------------------------------------------------------------------===//
18
20#include "Hexagon.h"
21#include "HexagonInstrInfo.h"
22#include "HexagonRegisterInfo.h"
23#include "HexagonSubtarget.h"
24#include "llvm/ADT/BitVector.h"
25#include "llvm/ADT/DenseSet.h"
26#include "llvm/ADT/STLExtras.h"
42#include "llvm/IR/DebugLoc.h"
44#include "llvm/MC/MCInstrDesc.h"
45#include "llvm/Pass.h"
47#include "llvm/Support/Debug.h"
50#include <cassert>
51#include <cstdint>
52#include <iterator>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "packets"
57
58static cl::opt<bool>
59 DisablePacketizer("disable-packetizer", cl::Hidden,
60 cl::desc("Disable Hexagon packetizer pass"));
61
62static cl::opt<bool> Slot1Store("slot1-store-slot0-load", cl::Hidden,
63 cl::init(true),
64 cl::desc("Allow slot1 store and slot0 load"));
65
67 "hexagon-packetize-volatiles", cl::Hidden, cl::init(true),
68 cl::desc("Allow non-solo packetization of volatile memory references"));
69
70static cl::opt<bool>
72 cl::desc("Generate all instruction with TC"));
73
74static cl::opt<bool>
75 DisableVecDblNVStores("disable-vecdbl-nv-stores", cl::Hidden,
76 cl::desc("Disable vector double new-value-stores"));
77
79
80namespace llvm {
81
84
85} // end namespace llvm
86
87namespace {
88
89 class HexagonPacketizer : public MachineFunctionPass {
90 public:
91 static char ID;
92
93 HexagonPacketizer(bool Min = false)
94 : MachineFunctionPass(ID), Minimal(Min) {}
95
96 void getAnalysisUsage(AnalysisUsage &AU) const override {
97 AU.setPreservesCFG();
105 }
106
107 StringRef getPassName() const override { return "Hexagon Packetizer"; }
108 bool runOnMachineFunction(MachineFunction &Fn) override;
109
112 MachineFunctionProperties::Property::NoVRegs);
113 }
114
115 private:
116 const HexagonInstrInfo *HII = nullptr;
117 const HexagonRegisterInfo *HRI = nullptr;
118 const bool Minimal = false;
119 };
120
121} // end anonymous namespace
122
123char HexagonPacketizer::ID = 0;
124
125INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer",
126 "Hexagon Packetizer", false, false)
131INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
132 "Hexagon Packetizer", false, false)
133
135 MachineLoopInfo &MLI, AAResults *AA,
136 const MachineBranchProbabilityInfo *MBPI, bool Minimal)
137 : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI),
138 Minimal(Minimal) {
139 HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
140 HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
141
142 addMutation(std::make_unique<HexagonSubtarget::UsrOverflowMutation>());
143 addMutation(std::make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
144 addMutation(std::make_unique<HexagonSubtarget::BankConflictMutation>());
145}
146
147// Check if FirstI modifies a register that SecondI reads.
148static bool hasWriteToReadDep(const MachineInstr &FirstI,
149 const MachineInstr &SecondI,
150 const TargetRegisterInfo *TRI) {
151 for (auto &MO : FirstI.operands()) {
152 if (!MO.isReg() || !MO.isDef())
153 continue;
154 Register R = MO.getReg();
155 if (SecondI.readsRegister(R, TRI))
156 return true;
157 }
158 return false;
159}
160
161
163 MachineBasicBlock::iterator BundleIt, bool Before) {
165 if (Before)
166 InsertPt = BundleIt.getInstrIterator();
167 else
168 InsertPt = std::next(BundleIt).getInstrIterator();
169
170 MachineBasicBlock &B = *MI.getParent();
171 // The instruction should at least be bundled with the preceding instruction
172 // (there will always be one, i.e. BUNDLE, if nothing else).
173 assert(MI.isBundledWithPred());
174 if (MI.isBundledWithSucc()) {
175 MI.clearFlag(MachineInstr::BundledSucc);
176 MI.clearFlag(MachineInstr::BundledPred);
177 } else {
178 // If it's not bundled with the successor (i.e. it is the last one
179 // in the bundle), then we can simply unbundle it from the predecessor,
180 // which will take care of updating the predecessor's flag.
181 MI.unbundleFromPred();
182 }
183 B.splice(InsertPt, &B, MI.getIterator());
184
185 // Get the size of the bundle without asserting.
188 unsigned Size = 0;
189 for (++I; I != E && I->isBundledWithPred(); ++I)
190 ++Size;
191
192 // If there are still two or more instructions, then there is nothing
193 // else to be done.
194 if (Size > 1)
195 return BundleIt;
196
197 // Otherwise, extract the single instruction out and delete the bundle.
198 MachineBasicBlock::iterator NextIt = std::next(BundleIt);
199 MachineInstr &SingleI = *BundleIt->getNextNode();
200 SingleI.unbundleFromPred();
201 assert(!SingleI.isBundledWithSucc());
202 BundleIt->eraseFromParent();
203 return NextIt;
204}
205
206bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
207 // FIXME: This pass causes verification failures.
208 MF.getProperties().set(
209 MachineFunctionProperties::Property::FailsVerification);
210
211 auto &HST = MF.getSubtarget<HexagonSubtarget>();
212 HII = HST.getInstrInfo();
213 HRI = HST.getRegisterInfo();
214 auto &MLI = getAnalysis<MachineLoopInfo>();
215 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
216 auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
217
219 HII->genAllInsnTimingClasses(MF);
220
221 // Instantiate the packetizer.
222 bool MinOnly = Minimal || DisablePacketizer || !HST.usePackets() ||
223 skipFunction(MF.getFunction());
224 HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI, MinOnly);
225
226 // DFA state table should not be empty.
227 assert(Packetizer.getResourceTracker() && "Empty DFA table!");
228
229 // Loop over all basic blocks and remove KILL pseudo-instructions
230 // These instructions confuse the dependence analysis. Consider:
231 // D0 = ... (Insn 0)
232 // R0 = KILL R0, D0 (Insn 1)
233 // R0 = ... (Insn 2)
234 // Here, Insn 1 will result in the dependence graph not emitting an output
235 // dependence between Insn 0 and Insn 2. This can lead to incorrect
236 // packetization
237 for (MachineBasicBlock &MB : MF) {
239 if (MI.isKill())
240 MB.erase(&MI);
241 }
242
243 // TinyCore with Duplexes: Translate to big-instructions.
244 if (HST.isTinyCoreWithDuplex())
245 HII->translateInstrsForDup(MF, true);
246
247 // Loop over all of the basic blocks.
248 for (auto &MB : MF) {
249 auto Begin = MB.begin(), End = MB.end();
250 while (Begin != End) {
251 // Find the first non-boundary starting from the end of the last
252 // scheduling region.
254 while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF))
255 ++RB;
256 // Find the first boundary starting from the beginning of the new
257 // region.
259 while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF))
260 ++RE;
261 // Add the scheduling boundary if it's not block end.
262 if (RE != End)
263 ++RE;
264 // If RB == End, then RE == End.
265 if (RB != End)
266 Packetizer.PacketizeMIs(&MB, RB, RE);
267
268 Begin = RE;
269 }
270 }
271
272 // TinyCore with Duplexes: Translate to tiny-instructions.
273 if (HST.isTinyCoreWithDuplex())
274 HII->translateInstrsForDup(MF, false);
275
276 Packetizer.unpacketizeSoloInstrs(MF);
277 return true;
278}
279
280// Reserve resources for a constant extender. Trigger an assertion if the
281// reservation fails.
284 llvm_unreachable("Resources not available");
285}
286
289}
290
291// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
292// return true, otherwise, return false.
294 auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
295 bool Avail = ResourceTracker->canReserveResources(*ExtMI);
296 if (Reserve && Avail)
298 MF.deleteMachineInstr(ExtMI);
299 return Avail;
300}
301
303 SDep::Kind DepType, unsigned DepReg) {
304 // Check for LR dependence.
305 if (DepReg == HRI->getRARegister())
306 return true;
307
308 if (HII->isDeallocRet(MI))
309 if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
310 return true;
311
312 // Call-like instructions can be packetized with preceding instructions
313 // that define registers implicitly used or modified by the call. Explicit
314 // uses are still prohibited, as in the case of indirect calls:
315 // r0 = ...
316 // J2_jumpr r0
317 if (DepType == SDep::Data) {
318 for (const MachineOperand &MO : MI.operands())
319 if (MO.isReg() && MO.getReg() == DepReg && !MO.isImplicit())
320 return true;
321 }
322
323 return false;
324}
325
326static bool isRegDependence(const SDep::Kind DepType) {
327 return DepType == SDep::Data || DepType == SDep::Anti ||
328 DepType == SDep::Output;
329}
330
331static bool isDirectJump(const MachineInstr &MI) {
332 return MI.getOpcode() == Hexagon::J2_jump;
333}
334
335static bool isSchedBarrier(const MachineInstr &MI) {
336 switch (MI.getOpcode()) {
337 case Hexagon::Y2_barrier:
338 return true;
339 }
340 return false;
341}
342
343static bool isControlFlow(const MachineInstr &MI) {
344 return MI.getDesc().isTerminator() || MI.getDesc().isCall();
345}
346
347/// Returns true if the instruction modifies a callee-saved register.
349 const TargetRegisterInfo *TRI) {
350 const MachineFunction &MF = *MI.getParent()->getParent();
351 for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
352 if (MI.modifiesRegister(*CSR, TRI))
353 return true;
354 return false;
355}
356
357// Returns true if an instruction can be promoted to .new predicate or
358// new-value store.
360 const TargetRegisterClass *NewRC) {
361 // Vector stores can be predicated, and can be new-value stores, but
362 // they cannot be predicated on a .new predicate value.
363 if (NewRC == &Hexagon::PredRegsRegClass) {
364 if (HII->isHVXVec(MI) && MI.mayStore())
365 return false;
366 return HII->isPredicated(MI) && HII->getDotNewPredOp(MI, nullptr) > 0;
367 }
368 // If the class is not PredRegs, it could only apply to new-value stores.
369 return HII->mayBeNewStore(MI);
370}
371
372// Promote an instructiont to its .cur form.
373// At this time, we have already made a call to canPromoteToDotCur and made
374// sure that it can *indeed* be promoted.
377 const TargetRegisterClass* RC) {
378 assert(DepType == SDep::Data);
379 int CurOpcode = HII->getDotCurOp(MI);
380 MI.setDesc(HII->get(CurOpcode));
381 return true;
382}
383
385 MachineInstr *MI = nullptr;
386 for (auto *BI : CurrentPacketMIs) {
387 LLVM_DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
388 if (HII->isDotCurInst(*BI)) {
389 MI = BI;
390 continue;
391 }
392 if (MI) {
393 for (auto &MO : BI->operands())
394 if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
395 return;
396 }
397 }
398 if (!MI)
399 return;
400 // We did not find a use of the CUR, so de-cur it.
401 MI->setDesc(HII->get(HII->getNonDotCurOp(*MI)));
402 LLVM_DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
403}
404
405// Check to see if an instruction can be dot cur.
407 const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
408 const TargetRegisterClass *RC) {
409 if (!HII->isHVXVec(MI))
410 return false;
411 if (!HII->isHVXVec(*MII))
412 return false;
413
414 // Already a dot new instruction.
415 if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
416 return false;
417
418 if (!HII->mayBeCurLoad(MI))
419 return false;
420
421 // The "cur value" cannot come from inline asm.
422 if (PacketSU->getInstr()->isInlineAsm())
423 return false;
424
425 // Make sure candidate instruction uses cur.
426 LLVM_DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; MI.dump();
427 dbgs() << "in packet\n";);
428 MachineInstr &MJ = *MII;
429 LLVM_DEBUG({
430 dbgs() << "Checking CUR against ";
431 MJ.dump();
432 });
433 Register DestReg = MI.getOperand(0).getReg();
434 bool FoundMatch = false;
435 for (auto &MO : MJ.operands())
436 if (MO.isReg() && MO.getReg() == DestReg)
437 FoundMatch = true;
438 if (!FoundMatch)
439 return false;
440
441 // Check for existing uses of a vector register within the packet which
442 // would be affected by converting a vector load into .cur formt.
443 for (auto *BI : CurrentPacketMIs) {
444 LLVM_DEBUG(dbgs() << "packet has "; BI->dump(););
445 if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
446 return false;
447 }
448
449 LLVM_DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump(););
450 // We can convert the opcode into a .cur.
451 return true;
452}
453
454// Promote an instruction to its .new form. At this time, we have already
455// made a call to canPromoteToDotNew and made sure that it can *indeed* be
456// promoted.
459 const TargetRegisterClass* RC) {
460 assert(DepType == SDep::Data);
461 int NewOpcode;
462 if (RC == &Hexagon::PredRegsRegClass)
463 NewOpcode = HII->getDotNewPredOp(MI, MBPI);
464 else
465 NewOpcode = HII->getDotNewOp(MI);
466 MI.setDesc(HII->get(NewOpcode));
467 return true;
468}
469
471 int NewOpcode = HII->getDotOldOp(MI);
472 MI.setDesc(HII->get(NewOpcode));
473 return true;
474}
475
477 unsigned Opc = MI.getOpcode();
478 switch (Opc) {
479 case Hexagon::S2_storerd_io:
480 case Hexagon::S2_storeri_io:
481 case Hexagon::S2_storerh_io:
482 case Hexagon::S2_storerb_io:
483 break;
484 default:
485 llvm_unreachable("Unexpected instruction");
486 }
487 unsigned FrameSize = MF.getFrameInfo().getStackSize();
488 MachineOperand &Off = MI.getOperand(1);
489 int64_t NewOff = Off.getImm() - (FrameSize + HEXAGON_LRFP_SIZE);
490 if (HII->isValidOffset(Opc, NewOff, HRI)) {
491 Off.setImm(NewOff);
492 return true;
493 }
494 return false;
495}
496
498 unsigned Opc = MI.getOpcode();
499 switch (Opc) {
500 case Hexagon::S2_storerd_io:
501 case Hexagon::S2_storeri_io:
502 case Hexagon::S2_storerh_io:
503 case Hexagon::S2_storerb_io:
504 break;
505 default:
506 llvm_unreachable("Unexpected instruction");
507 }
508 unsigned FrameSize = MF.getFrameInfo().getStackSize();
509 MachineOperand &Off = MI.getOperand(1);
510 Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
511}
512
513/// Return true if we can update the offset in MI so that MI and MJ
514/// can be packetized together.
516 assert(SUI->getInstr() && SUJ->getInstr());
517 MachineInstr &MI = *SUI->getInstr();
518 MachineInstr &MJ = *SUJ->getInstr();
519
520 unsigned BPI, OPI;
521 if (!HII->getBaseAndOffsetPosition(MI, BPI, OPI))
522 return false;
523 unsigned BPJ, OPJ;
524 if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ))
525 return false;
526 Register Reg = MI.getOperand(BPI).getReg();
527 if (Reg != MJ.getOperand(BPJ).getReg())
528 return false;
529 // Make sure that the dependences do not restrict adding MI to the packet.
530 // That is, ignore anti dependences, and make sure the only data dependence
531 // involves the specific register.
532 for (const auto &PI : SUI->Preds)
533 if (PI.getKind() != SDep::Anti &&
534 (PI.getKind() != SDep::Data || PI.getReg() != Reg))
535 return false;
536 int Incr;
537 if (!HII->getIncrementValue(MJ, Incr))
538 return false;
539
540 int64_t Offset = MI.getOperand(OPI).getImm();
541 if (!HII->isValidOffset(MI.getOpcode(), Offset+Incr, HRI))
542 return false;
543
544 MI.getOperand(OPI).setImm(Offset + Incr);
545 ChangedOffset = Offset;
546 return true;
547}
548
549/// Undo the changed offset. This is needed if the instruction cannot be
550/// added to the current packet due to a different instruction.
552 unsigned BP, OP;
553 if (!HII->getBaseAndOffsetPosition(MI, BP, OP))
554 llvm_unreachable("Unable to find base and offset operands.");
555 MI.getOperand(OP).setImm(ChangedOffset);
556}
557
563
564/// Returns true if an instruction is predicated on p0 and false if it's
565/// predicated on !p0.
567 const HexagonInstrInfo *HII) {
568 if (!HII->isPredicated(MI))
569 return PK_Unknown;
570 if (HII->isPredicatedTrue(MI))
571 return PK_True;
572 return PK_False;
573}
574
576 const HexagonInstrInfo *HII) {
577 assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
578#ifndef NDEBUG
579 // Post Increment means duplicates. Use dense map to find duplicates in the
580 // list. Caution: Densemap initializes with the minimum of 64 buckets,
581 // whereas there are at most 5 operands in the post increment.
582 DenseSet<unsigned> DefRegsSet;
583 for (auto &MO : MI.operands())
584 if (MO.isReg() && MO.isDef())
585 DefRegsSet.insert(MO.getReg());
586
587 for (auto &MO : MI.operands())
588 if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
589 return MO;
590#else
591 if (MI.mayLoad()) {
592 const MachineOperand &Op1 = MI.getOperand(1);
593 // The 2nd operand is always the post increment operand in load.
594 assert(Op1.isReg() && "Post increment operand has be to a register.");
595 return Op1;
596 }
597 if (MI.getDesc().mayStore()) {
598 const MachineOperand &Op0 = MI.getOperand(0);
599 // The 1st operand is always the post increment operand in store.
600 assert(Op0.isReg() && "Post increment operand has be to a register.");
601 return Op0;
602 }
603#endif
604 // we should never come here.
605 llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
606}
607
608// Get the value being stored.
610 // value being stored is always the last operand.
611 return MI.getOperand(MI.getNumOperands()-1);
612}
613
614static bool isLoadAbsSet(const MachineInstr &MI) {
615 unsigned Opc = MI.getOpcode();
616 switch (Opc) {
617 case Hexagon::L4_loadrd_ap:
618 case Hexagon::L4_loadrb_ap:
619 case Hexagon::L4_loadrh_ap:
620 case Hexagon::L4_loadrub_ap:
621 case Hexagon::L4_loadruh_ap:
622 case Hexagon::L4_loadri_ap:
623 return true;
624 }
625 return false;
626}
627
630 return MI.getOperand(1);
631}
632
633// Can be new value store?
634// Following restrictions are to be respected in convert a store into
635// a new value store.
636// 1. If an instruction uses auto-increment, its address register cannot
637// be a new-value register. Arch Spec 5.4.2.1
638// 2. If an instruction uses absolute-set addressing mode, its address
639// register cannot be a new-value register. Arch Spec 5.4.2.1.
640// 3. If an instruction produces a 64-bit result, its registers cannot be used
641// as new-value registers. Arch Spec 5.4.2.2.
642// 4. If the instruction that sets the new-value register is conditional, then
643// the instruction that uses the new-value register must also be conditional,
644// and both must always have their predicates evaluate identically.
645// Arch Spec 5.4.2.3.
646// 5. There is an implied restriction that a packet cannot have another store,
647// if there is a new value store in the packet. Corollary: if there is
648// already a store in a packet, there can not be a new value store.
649// Arch Spec: 3.4.4.2
651 const MachineInstr &PacketMI, unsigned DepReg) {
652 // Make sure we are looking at the store, that can be promoted.
653 if (!HII->mayBeNewStore(MI))
654 return false;
655
656 // Make sure there is dependency and can be new value'd.
658 if (Val.isReg() && Val.getReg() != DepReg)
659 return false;
660
661 const MCInstrDesc& MCID = PacketMI.getDesc();
662
663 // First operand is always the result.
664 const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
665 // Double regs can not feed into new value store: PRM section: 5.4.2.2.
666 if (PacketRC == &Hexagon::DoubleRegsRegClass)
667 return false;
668
669 // New-value stores are of class NV (slot 0), dual stores require class ST
670 // in slot 0 (PRM 5.5).
671 for (auto *I : CurrentPacketMIs) {
672 SUnit *PacketSU = MIToSUnit.find(I)->second;
673 if (PacketSU->getInstr()->mayStore())
674 return false;
675 }
676
677 // Make sure it's NOT the post increment register that we are going to
678 // new value.
679 if (HII->isPostIncrement(MI) &&
680 getPostIncrementOperand(MI, HII).getReg() == DepReg) {
681 return false;
682 }
683
684 if (HII->isPostIncrement(PacketMI) && PacketMI.mayLoad() &&
685 getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
686 // If source is post_inc, or absolute-set addressing, it can not feed
687 // into new value store
688 // r3 = memw(r2++#4)
689 // memw(r30 + #-1404) = r2.new -> can not be new value store
690 // arch spec section: 5.4.2.1.
691 return false;
692 }
693
694 if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
695 return false;
696
697 // If the source that feeds the store is predicated, new value store must
698 // also be predicated.
699 if (HII->isPredicated(PacketMI)) {
700 if (!HII->isPredicated(MI))
701 return false;
702
703 // Check to make sure that they both will have their predicates
704 // evaluate identically.
705 unsigned predRegNumSrc = 0;
706 unsigned predRegNumDst = 0;
707 const TargetRegisterClass* predRegClass = nullptr;
708
709 // Get predicate register used in the source instruction.
710 for (auto &MO : PacketMI.operands()) {
711 if (!MO.isReg())
712 continue;
713 predRegNumSrc = MO.getReg();
714 predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
715 if (predRegClass == &Hexagon::PredRegsRegClass)
716 break;
717 }
718 assert((predRegClass == &Hexagon::PredRegsRegClass) &&
719 "predicate register not found in a predicated PacketMI instruction");
720
721 // Get predicate register used in new-value store instruction.
722 for (auto &MO : MI.operands()) {
723 if (!MO.isReg())
724 continue;
725 predRegNumDst = MO.getReg();
726 predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
727 if (predRegClass == &Hexagon::PredRegsRegClass)
728 break;
729 }
730 assert((predRegClass == &Hexagon::PredRegsRegClass) &&
731 "predicate register not found in a predicated MI instruction");
732
733 // New-value register producer and user (store) need to satisfy these
734 // constraints:
735 // 1) Both instructions should be predicated on the same register.
736 // 2) If producer of the new-value register is .new predicated then store
737 // should also be .new predicated and if producer is not .new predicated
738 // then store should not be .new predicated.
739 // 3) Both new-value register producer and user should have same predicate
740 // sense, i.e, either both should be negated or both should be non-negated.
741 if (predRegNumDst != predRegNumSrc ||
742 HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) ||
743 getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
744 return false;
745 }
746
747 // Make sure that other than the new-value register no other store instruction
748 // register has been modified in the same packet. Predicate registers can be
749 // modified by they should not be modified between the producer and the store
750 // instruction as it will make them both conditional on different values.
751 // We already know this to be true for all the instructions before and
752 // including PacketMI. Howerver, we need to perform the check for the
753 // remaining instructions in the packet.
754
755 unsigned StartCheck = 0;
756
757 for (auto *I : CurrentPacketMIs) {
758 SUnit *TempSU = MIToSUnit.find(I)->second;
759 MachineInstr &TempMI = *TempSU->getInstr();
760
761 // Following condition is true for all the instructions until PacketMI is
762 // reached (StartCheck is set to 0 before the for loop).
763 // StartCheck flag is 1 for all the instructions after PacketMI.
764 if (&TempMI != &PacketMI && !StartCheck) // Start processing only after
765 continue; // encountering PacketMI.
766
767 StartCheck = 1;
768 if (&TempMI == &PacketMI) // We don't want to check PacketMI for dependence.
769 continue;
770
771 for (auto &MO : MI.operands())
772 if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
773 return false;
774 }
775
776 // Make sure that for non-POST_INC stores:
777 // 1. The only use of reg is DepReg and no other registers.
778 // This handles base+index registers.
779 // The following store can not be dot new.
780 // Eg. r0 = add(r0, #3)
781 // memw(r1+r0<<#2) = r0
782 if (!HII->isPostIncrement(MI)) {
783 for (unsigned opNum = 0; opNum < MI.getNumOperands()-1; opNum++) {
784 const MachineOperand &MO = MI.getOperand(opNum);
785 if (MO.isReg() && MO.getReg() == DepReg)
786 return false;
787 }
788 }
789
790 // If data definition is because of implicit definition of the register,
791 // do not newify the store. Eg.
792 // %r9 = ZXTH %r12, implicit %d6, implicit-def %r12
793 // S2_storerh_io %r8, 2, killed %r12; mem:ST2[%scevgep343]
794 for (auto &MO : PacketMI.operands()) {
795 if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
796 return false;
797 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
798 continue;
799 Register R = MO.getReg();
800 if (R == DepReg || HRI->isSuperRegister(DepReg, R))
801 return false;
802 }
803
804 // Handle imp-use of super reg case. There is a target independent side
805 // change that should prevent this situation but I am handling it for
806 // just-in-case. For example, we cannot newify R2 in the following case:
807 // %r3 = A2_tfrsi 0;
808 // S2_storeri_io killed %r0, 0, killed %r2, implicit killed %d1;
809 for (auto &MO : MI.operands()) {
810 if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
811 return false;
812 }
813
814 // Can be dot new store.
815 return true;
816}
817
818// Can this MI to promoted to either new value store or new value jump.
820 const SUnit *PacketSU, unsigned DepReg,
822 if (!HII->mayBeNewStore(MI))
823 return false;
824
825 // Check to see the store can be new value'ed.
826 MachineInstr &PacketMI = *PacketSU->getInstr();
827 if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
828 return true;
829
830 // Check to see the compare/jump can be new value'ed.
831 // This is done as a pass on its own. Don't need to check it here.
832 return false;
833}
834
835static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
836 unsigned DepReg) {
837 for (auto &MO : I.operands()) {
838 if (CheckDef && MO.isRegMask() && MO.clobbersPhysReg(DepReg))
839 return true;
840 if (!MO.isReg() || MO.getReg() != DepReg || !MO.isImplicit())
841 continue;
842 if (CheckDef == MO.isDef())
843 return true;
844 }
845 return false;
846}
847
848// Check to see if an instruction can be dot new.
850 const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
851 const TargetRegisterClass* RC) {
852 // Already a dot new instruction.
853 if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
854 return false;
855
856 if (!isNewifiable(MI, RC))
857 return false;
858
859 const MachineInstr &PI = *PacketSU->getInstr();
860
861 // The "new value" cannot come from inline asm.
862 if (PI.isInlineAsm())
863 return false;
864
865 // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
866 // sense.
867 if (PI.isImplicitDef())
868 return false;
869
870 // If dependency is trough an implicitly defined register, we should not
871 // newify the use.
872 if (isImplicitDependency(PI, true, DepReg) ||
873 isImplicitDependency(MI, false, DepReg))
874 return false;
875
876 const MCInstrDesc& MCID = PI.getDesc();
877 const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
878 if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass)
879 return false;
880
881 // predicate .new
882 if (RC == &Hexagon::PredRegsRegClass)
883 return HII->predCanBeUsedAsDotNew(PI, DepReg);
884
885 if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
886 return false;
887
888 // Create a dot new machine instruction to see if resources can be
889 // allocated. If not, bail out now.
890 int NewOpcode = (RC != &Hexagon::PredRegsRegClass) ? HII->getDotNewOp(MI) :
891 HII->getDotNewPredOp(MI, MBPI);
892 const MCInstrDesc &D = HII->get(NewOpcode);
894 bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI);
895 MF.deleteMachineInstr(NewMI);
896 if (!ResourcesAvailable)
897 return false;
898
899 // New Value Store only. New Value Jump generated as a separate pass.
900 if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
901 return false;
902
903 return true;
904}
905
906// Go through the packet instructions and search for an anti dependency between
907// them and DepReg from MI. Consider this case:
908// Trying to add
909// a) %r1 = TFRI_cdNotPt %p3, 2
910// to this packet:
911// {
912// b) %p0 = C2_or killed %p3, killed %p0
913// c) %p3 = C2_tfrrp %r23
914// d) %r1 = C2_cmovenewit %p3, 4
915// }
916// The P3 from a) and d) will be complements after
917// a)'s P3 is converted to .new form
918// Anti-dep between c) and b) is irrelevant for this case
920 unsigned DepReg) {
921 SUnit *PacketSUDep = MIToSUnit.find(&MI)->second;
922
923 for (auto *I : CurrentPacketMIs) {
924 // We only care for dependencies to predicated instructions
925 if (!HII->isPredicated(*I))
926 continue;
927
928 // Scheduling Unit for current insn in the packet
929 SUnit *PacketSU = MIToSUnit.find(I)->second;
930
931 // Look at dependencies between current members of the packet and
932 // predicate defining instruction MI. Make sure that dependency is
933 // on the exact register we care about.
934 if (PacketSU->isSucc(PacketSUDep)) {
935 for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
936 auto &Dep = PacketSU->Succs[i];
937 if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
938 Dep.getReg() == DepReg)
939 return true;
940 }
941 }
942 }
943
944 return false;
945}
946
947/// Gets the predicate register of a predicated instruction.
949 const HexagonInstrInfo *QII) {
950 /// We use the following rule: The first predicate register that is a use is
951 /// the predicate register of a predicated instruction.
952 assert(QII->isPredicated(MI) && "Must be predicated instruction");
953
954 for (auto &Op : MI.operands()) {
955 if (Op.isReg() && Op.getReg() && Op.isUse() &&
956 Hexagon::PredRegsRegClass.contains(Op.getReg()))
957 return Op.getReg();
958 }
959
960 llvm_unreachable("Unknown instruction operand layout");
961 return 0;
962}
963
964// Given two predicated instructions, this function detects whether
965// the predicates are complements.
967 MachineInstr &MI2) {
968 // If we don't know the predicate sense of the instructions bail out early, we
969 // need it later.
970 if (getPredicateSense(MI1, HII) == PK_Unknown ||
971 getPredicateSense(MI2, HII) == PK_Unknown)
972 return false;
973
974 // Scheduling unit for candidate.
975 SUnit *SU = MIToSUnit[&MI1];
976
977 // One corner case deals with the following scenario:
978 // Trying to add
979 // a) %r24 = A2_tfrt %p0, %r25
980 // to this packet:
981 // {
982 // b) %r25 = A2_tfrf %p0, %r24
983 // c) %p0 = C2_cmpeqi %r26, 1
984 // }
985 //
986 // On general check a) and b) are complements, but presence of c) will
987 // convert a) to .new form, and then it is not a complement.
988 // We attempt to detect it by analyzing existing dependencies in the packet.
989
990 // Analyze relationships between all existing members of the packet.
991 // Look for Anti dependecy on the same predicate reg as used in the
992 // candidate.
993 for (auto *I : CurrentPacketMIs) {
994 // Scheduling Unit for current insn in the packet.
995 SUnit *PacketSU = MIToSUnit.find(I)->second;
996
997 // If this instruction in the packet is succeeded by the candidate...
998 if (PacketSU->isSucc(SU)) {
999 for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
1000 auto Dep = PacketSU->Succs[i];
1001 // The corner case exist when there is true data dependency between
1002 // candidate and one of current packet members, this dep is on
1003 // predicate reg, and there already exist anti dep on the same pred in
1004 // the packet.
1005 if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
1006 Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
1007 // Here I know that I is predicate setting instruction with true
1008 // data dep to candidate on the register we care about - c) in the
1009 // above example. Now I need to see if there is an anti dependency
1010 // from c) to any other instruction in the same packet on the pred
1011 // reg of interest.
1012 if (restrictingDepExistInPacket(*I, Dep.getReg()))
1013 return false;
1014 }
1015 }
1016 }
1017 }
1018
1019 // If the above case does not apply, check regular complement condition.
1020 // Check that the predicate register is the same and that the predicate
1021 // sense is different We also need to differentiate .old vs. .new: !p0
1022 // is not complementary to p0.new.
1023 unsigned PReg1 = getPredicatedRegister(MI1, HII);
1024 unsigned PReg2 = getPredicatedRegister(MI2, HII);
1025 return PReg1 == PReg2 &&
1026 Hexagon::PredRegsRegClass.contains(PReg1) &&
1027 Hexagon::PredRegsRegClass.contains(PReg2) &&
1028 getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
1029 HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
1030}
1031
1032// Initialize packetizer flags.
1034 Dependence = false;
1035 PromotedToDotNew = false;
1036 GlueToNewValueJump = false;
1037 GlueAllocframeStore = false;
1038 FoundSequentialDependence = false;
1039 ChangedOffset = INT64_MAX;
1040}
1041
1042// Ignore bundling of pseudo instructions.
1044 const MachineBasicBlock *) {
1045 if (MI.isDebugInstr())
1046 return true;
1047
1048 if (MI.isCFIInstruction())
1049 return false;
1050
1051 // We must print out inline assembly.
1052 if (MI.isInlineAsm())
1053 return false;
1054
1055 if (MI.isImplicitDef())
1056 return false;
1057
1058 // We check if MI has any functional units mapped to it. If it doesn't,
1059 // we ignore the instruction.
1060 const MCInstrDesc& TID = MI.getDesc();
1062 return !IS->getUnits();
1063}
1064
1066 // Ensure any bundles created by gather packetize remain separate.
1067 if (MI.isBundle())
1068 return true;
1069
1070 if (MI.isEHLabel() || MI.isCFIInstruction())
1071 return true;
1072
1073 // Consider inline asm to not be a solo instruction by default.
1074 // Inline asm will be put in a packet temporarily, but then it will be
1075 // removed, and placed outside of the packet (before or after, depending
1076 // on dependencies). This is to reduce the impact of inline asm as a
1077 // "packet splitting" instruction.
1078 if (MI.isInlineAsm() && !ScheduleInlineAsm)
1079 return true;
1080
1081 if (isSchedBarrier(MI))
1082 return true;
1083
1084 if (HII->isSolo(MI))
1085 return true;
1086
1087 if (MI.getOpcode() == Hexagon::PATCHABLE_FUNCTION_ENTER ||
1088 MI.getOpcode() == Hexagon::PATCHABLE_FUNCTION_EXIT ||
1089 MI.getOpcode() == Hexagon::PATCHABLE_TAIL_CALL)
1090 return true;
1091
1092 if (MI.getOpcode() == Hexagon::A2_nop)
1093 return true;
1094
1095 return false;
1096}
1097
1098// Quick check if instructions MI and MJ cannot coexist in the same packet.
1099// Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm",
1100// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
1101// For full test call this function twice:
1102// cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
1103// Doing the test only one way saves the amount of code in this function,
1104// since every test would need to be repeated with the MI and MJ reversed.
1105static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
1106 const HexagonInstrInfo &HII) {
1107 const MachineFunction *MF = MI.getParent()->getParent();
1109 HII.isHVXMemWithAIndirect(MI, MJ))
1110 return true;
1111
1112 // Don't allow a store and an instruction that must be in slot0 and
1113 // doesn't allow a slot1 instruction.
1114 if (MI.mayStore() && HII.isRestrictNoSlot1Store(MJ) && HII.isPureSlot0(MJ))
1115 return true;
1116
1117 // An inline asm cannot be together with a branch, because we may not be
1118 // able to remove the asm out after packetizing (i.e. if the asm must be
1119 // moved past the bundle). Similarly, two asms cannot be together to avoid
1120 // complications when determining their relative order outside of a bundle.
1121 if (MI.isInlineAsm())
1122 return MJ.isInlineAsm() || MJ.isBranch() || MJ.isBarrier() ||
1123 MJ.isCall() || MJ.isTerminator();
1124
1125 // New-value stores cannot coexist with any other stores.
1126 if (HII.isNewValueStore(MI) && MJ.mayStore())
1127 return true;
1128
1129 switch (MI.getOpcode()) {
1130 case Hexagon::S2_storew_locked:
1131 case Hexagon::S4_stored_locked:
1132 case Hexagon::L2_loadw_locked:
1133 case Hexagon::L4_loadd_locked:
1134 case Hexagon::Y2_dccleana:
1135 case Hexagon::Y2_dccleaninva:
1136 case Hexagon::Y2_dcinva:
1137 case Hexagon::Y2_dczeroa:
1138 case Hexagon::Y4_l2fetch:
1139 case Hexagon::Y5_l2fetch: {
1140 // These instructions can only be grouped with ALU32 or non-floating-point
1141 // XTYPE instructions. Since there is no convenient way of identifying fp
1142 // XTYPE instructions, only allow grouping with ALU32 for now.
1143 unsigned TJ = HII.getType(MJ);
1144 if (TJ != HexagonII::TypeALU32_2op &&
1147 return true;
1148 break;
1149 }
1150 default:
1151 break;
1152 }
1153
1154 // "False" really means that the quick check failed to determine if
1155 // I and J cannot coexist.
1156 return false;
1157}
1158
1159// Full, symmetric check.
1161 const MachineInstr &MJ) {
1162 return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
1163}
1164
1166 for (auto &B : MF) {
1168 for (MachineInstr &MI : llvm::make_early_inc_range(B.instrs())) {
1169 if (MI.isBundle())
1170 BundleIt = MI.getIterator();
1171 if (!MI.isInsideBundle())
1172 continue;
1173
1174 // Decide on where to insert the instruction that we are pulling out.
1175 // Debug instructions always go before the bundle, but the placement of
1176 // INLINE_ASM depends on potential dependencies. By default, try to
1177 // put it before the bundle, but if the asm writes to a register that
1178 // other instructions in the bundle read, then we need to place it
1179 // after the bundle (to preserve the bundle semantics).
1180 bool InsertBeforeBundle;
1181 if (MI.isInlineAsm())
1182 InsertBeforeBundle = !hasWriteToReadDep(MI, *BundleIt, HRI);
1183 else if (MI.isDebugValue())
1184 InsertBeforeBundle = true;
1185 else
1186 continue;
1187
1188 BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
1189 }
1190 }
1191}
1192
1193// Check if a given instruction is of class "system".
1194static bool isSystemInstr(const MachineInstr &MI) {
1195 unsigned Opc = MI.getOpcode();
1196 switch (Opc) {
1197 case Hexagon::Y2_barrier:
1198 case Hexagon::Y2_dcfetchbo:
1199 case Hexagon::Y4_l2fetch:
1200 case Hexagon::Y5_l2fetch:
1201 return true;
1202 }
1203 return false;
1204}
1205
1207 const MachineInstr &J) {
1208 // The dependence graph may not include edges between dead definitions,
1209 // so without extra checks, we could end up packetizing two instruction
1210 // defining the same (dead) register.
1211 if (I.isCall() || J.isCall())
1212 return false;
1213 if (HII->isPredicated(I) || HII->isPredicated(J))
1214 return false;
1215
1216 BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
1217 for (auto &MO : I.operands()) {
1218 if (!MO.isReg() || !MO.isDef() || !MO.isDead())
1219 continue;
1220 DeadDefs[MO.getReg()] = true;
1221 }
1222
1223 for (auto &MO : J.operands()) {
1224 if (!MO.isReg() || !MO.isDef() || !MO.isDead())
1225 continue;
1226 Register R = MO.getReg();
1227 if (R != Hexagon::USR_OVF && DeadDefs[R])
1228 return true;
1229 }
1230 return false;
1231}
1232
1234 const MachineInstr &J) {
1235 // A save callee-save register function call can only be in a packet
1236 // with instructions that don't write to the callee-save registers.
1237 if ((HII->isSaveCalleeSavedRegsCall(I) &&
1238 doesModifyCalleeSavedReg(J, HRI)) ||
1239 (HII->isSaveCalleeSavedRegsCall(J) &&
1241 return true;
1242
1243 // Two control flow instructions cannot go in the same packet.
1244 if (isControlFlow(I) && isControlFlow(J))
1245 return true;
1246
1247 // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
1248 // contain a speculative indirect jump,
1249 // a new-value compare jump or a dealloc_return.
1250 auto isBadForLoopN = [this] (const MachineInstr &MI) -> bool {
1251 if (MI.isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
1252 return true;
1253 if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
1254 return true;
1255 return false;
1256 };
1257
1258 if (HII->isLoopN(I) && isBadForLoopN(J))
1259 return true;
1260 if (HII->isLoopN(J) && isBadForLoopN(I))
1261 return true;
1262
1263 // dealloc_return cannot appear in the same packet as a conditional or
1264 // unconditional jump.
1265 return HII->isDeallocRet(I) &&
1266 (J.isBranch() || J.isCall() || J.isBarrier());
1267}
1268
1270 const MachineInstr &J) {
1271 // Adding I to a packet that has J.
1272
1273 // Regmasks are not reflected in the scheduling dependency graph, so
1274 // we need to check them manually. This code assumes that regmasks only
1275 // occur on calls, and the problematic case is when we add an instruction
1276 // defining a register R to a packet that has a call that clobbers R via
1277 // a regmask. Those cannot be packetized together, because the call will
1278 // be executed last. That's also a reson why it is ok to add a call
1279 // clobbering R to a packet that defines R.
1280
1281 // Look for regmasks in J.
1282 for (const MachineOperand &OpJ : J.operands()) {
1283 if (!OpJ.isRegMask())
1284 continue;
1285 assert((J.isCall() || HII->isTailCall(J)) && "Regmask on a non-call");
1286 for (const MachineOperand &OpI : I.operands()) {
1287 if (OpI.isReg()) {
1288 if (OpJ.clobbersPhysReg(OpI.getReg()))
1289 return true;
1290 } else if (OpI.isRegMask()) {
1291 // Both are regmasks. Assume that they intersect.
1292 return true;
1293 }
1294 }
1295 }
1296 return false;
1297}
1298
1300 const MachineInstr &J) {
1301 bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
1302 bool StoreI = I.mayStore(), StoreJ = J.mayStore();
1303 if ((SysI && StoreJ) || (SysJ && StoreI))
1304 return true;
1305
1306 if (StoreI && StoreJ) {
1307 if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
1308 return true;
1309 } else {
1310 // A memop cannot be in the same packet with another memop or a store.
1311 // Two stores can be together, but here I and J cannot both be stores.
1312 bool MopStI = HII->isMemOp(I) || StoreI;
1313 bool MopStJ = HII->isMemOp(J) || StoreJ;
1314 if (MopStI && MopStJ)
1315 return true;
1316 }
1317
1318 return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
1319}
1320
1321// SUI is the current instruction that is outside of the current packet.
1322// SUJ is the current instruction inside the current packet against which that
1323// SUI will be packetized.
1325 assert(SUI->getInstr() && SUJ->getInstr());
1326 MachineInstr &I = *SUI->getInstr();
1327 MachineInstr &J = *SUJ->getInstr();
1328
1329 // Clear IgnoreDepMIs when Packet starts.
1330 if (CurrentPacketMIs.size() == 1)
1331 IgnoreDepMIs.clear();
1332
1333 MachineBasicBlock::iterator II = I.getIterator();
1334
1335 // Solo instructions cannot go in the packet.
1336 assert(!isSoloInstruction(I) && "Unexpected solo instr!");
1337
1338 if (cannotCoexist(I, J))
1339 return false;
1340
1342 if (Dependence)
1343 return false;
1344
1345 // Regmasks are not accounted for in the scheduling graph, so we need
1346 // to explicitly check for dependencies caused by them. They should only
1347 // appear on calls, so it's not too pessimistic to reject all regmask
1348 // dependencies.
1350 if (Dependence)
1351 return false;
1352
1353 // Dual-store does not allow second store, if the first store is not
1354 // in SLOT0. New value store, new value jump, dealloc_return and memop
1355 // always take SLOT0. Arch spec 3.4.4.2.
1357 if (Dependence)
1358 return false;
1359
1360 // If an instruction feeds new value jump, glue it.
1361 MachineBasicBlock::iterator NextMII = I.getIterator();
1362 ++NextMII;
1363 if (NextMII != I.getParent()->end() && HII->isNewValueJump(*NextMII)) {
1364 MachineInstr &NextMI = *NextMII;
1365
1366 bool secondRegMatch = false;
1367 const MachineOperand &NOp0 = NextMI.getOperand(0);
1368 const MachineOperand &NOp1 = NextMI.getOperand(1);
1369
1370 if (NOp1.isReg() && I.getOperand(0).getReg() == NOp1.getReg())
1371 secondRegMatch = true;
1372
1373 for (MachineInstr *PI : CurrentPacketMIs) {
1374 // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
1375 if (PI->isCall()) {
1376 Dependence = true;
1377 break;
1378 }
1379 // Validate:
1380 // 1. Packet does not have a store in it.
1381 // 2. If the first operand of the nvj is newified, and the second
1382 // operand is also a reg, it (second reg) is not defined in
1383 // the same packet.
1384 // 3. If the second operand of the nvj is newified, (which means
1385 // first operand is also a reg), first reg is not defined in
1386 // the same packet.
1387 if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
1388 HII->isLoopN(*PI)) {
1389 Dependence = true;
1390 break;
1391 }
1392 // Check #2/#3.
1393 const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
1394 if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
1395 Dependence = true;
1396 break;
1397 }
1398 }
1399
1400 GlueToNewValueJump = true;
1401 if (Dependence)
1402 return false;
1403 }
1404
1405 // There no dependency between a prolog instruction and its successor.
1406 if (!SUJ->isSucc(SUI))
1407 return true;
1408
1409 for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
1410 if (FoundSequentialDependence)
1411 break;
1412
1413 if (SUJ->Succs[i].getSUnit() != SUI)
1414 continue;
1415
1416 SDep::Kind DepType = SUJ->Succs[i].getKind();
1417 // For direct calls:
1418 // Ignore register dependences for call instructions for packetization
1419 // purposes except for those due to r31 and predicate registers.
1420 //
1421 // For indirect calls:
1422 // Same as direct calls + check for true dependences to the register
1423 // used in the indirect call.
1424 //
1425 // We completely ignore Order dependences for call instructions.
1426 //
1427 // For returns:
1428 // Ignore register dependences for return instructions like jumpr,
1429 // dealloc return unless we have dependencies on the explicit uses
1430 // of the registers used by jumpr (like r31) or dealloc return
1431 // (like r29 or r30).
1432 unsigned DepReg = 0;
1433 const TargetRegisterClass *RC = nullptr;
1434 if (DepType == SDep::Data) {
1435 DepReg = SUJ->Succs[i].getReg();
1436 RC = HRI->getMinimalPhysRegClass(DepReg);
1437 }
1438
1439 if (I.isCall() || HII->isJumpR(I) || I.isReturn() || HII->isTailCall(I)) {
1440 if (!isRegDependence(DepType))
1441 continue;
1442 if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
1443 continue;
1444 }
1445
1446 if (DepType == SDep::Data) {
1447 if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
1448 if (promoteToDotCur(J, DepType, II, RC))
1449 continue;
1450 }
1451
1452 // Data dpendence ok if we have load.cur.
1453 if (DepType == SDep::Data && HII->isDotCurInst(J)) {
1454 if (HII->isHVXVec(I))
1455 continue;
1456 }
1457
1458 // For instructions that can be promoted to dot-new, try to promote.
1459 if (DepType == SDep::Data) {
1460 if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
1461 if (promoteToDotNew(I, DepType, II, RC)) {
1462 PromotedToDotNew = true;
1463 if (cannotCoexist(I, J))
1464 FoundSequentialDependence = true;
1465 continue;
1466 }
1467 }
1468 if (HII->isNewValueJump(I))
1469 continue;
1470 }
1471
1472 // For predicated instructions, if the predicates are complements then
1473 // there can be no dependence.
1474 if (HII->isPredicated(I) && HII->isPredicated(J) &&
1476 // Not always safe to do this translation.
1477 // DAG Builder attempts to reduce dependence edges using transitive
1478 // nature of dependencies. Here is an example:
1479 //
1480 // r0 = tfr_pt ... (1)
1481 // r0 = tfr_pf ... (2)
1482 // r0 = tfr_pt ... (3)
1483 //
1484 // There will be an output dependence between (1)->(2) and (2)->(3).
1485 // However, there is no dependence edge between (1)->(3). This results
1486 // in all 3 instructions going in the same packet. We ignore dependce
1487 // only once to avoid this situation.
1488 auto Itr = find(IgnoreDepMIs, &J);
1489 if (Itr != IgnoreDepMIs.end()) {
1490 Dependence = true;
1491 return false;
1492 }
1493 IgnoreDepMIs.push_back(&I);
1494 continue;
1495 }
1496
1497 // Ignore Order dependences between unconditional direct branches
1498 // and non-control-flow instructions.
1499 if (isDirectJump(I) && !J.isBranch() && !J.isCall() &&
1500 DepType == SDep::Order)
1501 continue;
1502
1503 // Ignore all dependences for jumps except for true and output
1504 // dependences.
1505 if (I.isConditionalBranch() && DepType != SDep::Data &&
1506 DepType != SDep::Output)
1507 continue;
1508
1509 if (DepType == SDep::Output) {
1510 FoundSequentialDependence = true;
1511 break;
1512 }
1513
1514 // For Order dependences:
1515 // 1. Volatile loads/stores can be packetized together, unless other
1516 // rules prevent is.
1517 // 2. Store followed by a load is not allowed.
1518 // 3. Store followed by a store is valid.
1519 // 4. Load followed by any memory operation is allowed.
1520 if (DepType == SDep::Order) {
1521 if (!PacketizeVolatiles) {
1522 bool OrdRefs = I.hasOrderedMemoryRef() || J.hasOrderedMemoryRef();
1523 if (OrdRefs) {
1524 FoundSequentialDependence = true;
1525 break;
1526 }
1527 }
1528 // J is first, I is second.
1529 bool LoadJ = J.mayLoad(), StoreJ = J.mayStore();
1530 bool LoadI = I.mayLoad(), StoreI = I.mayStore();
1531 bool NVStoreJ = HII->isNewValueStore(J);
1532 bool NVStoreI = HII->isNewValueStore(I);
1533 bool IsVecJ = HII->isHVXVec(J);
1534 bool IsVecI = HII->isHVXVec(I);
1535
1536 // Don't reorder the loads if there is an order dependence. This would
1537 // occur if the first instruction must go in slot0.
1538 if (LoadJ && LoadI && HII->isPureSlot0(J)) {
1539 FoundSequentialDependence = true;
1540 break;
1541 }
1542
1544 ((LoadJ && StoreI && !NVStoreI) ||
1545 (StoreJ && LoadI && !NVStoreJ)) &&
1546 (J.getOpcode() != Hexagon::S2_allocframe &&
1547 I.getOpcode() != Hexagon::S2_allocframe) &&
1548 (J.getOpcode() != Hexagon::L2_deallocframe &&
1549 I.getOpcode() != Hexagon::L2_deallocframe) &&
1550 (!HII->isMemOp(J) && !HII->isMemOp(I)) && (!IsVecJ && !IsVecI))
1551 setmemShufDisabled(true);
1552 else
1553 if (StoreJ && LoadI && alias(J, I)) {
1554 FoundSequentialDependence = true;
1555 break;
1556 }
1557
1558 if (!StoreJ)
1559 if (!LoadJ || (!LoadI && !StoreI)) {
1560 // If J is neither load nor store, assume a dependency.
1561 // If J is a load, but I is neither, also assume a dependency.
1562 FoundSequentialDependence = true;
1563 break;
1564 }
1565 // Store followed by store: not OK on V2.
1566 // Store followed by load: not OK on all.
1567 // Load followed by store: OK on all.
1568 // Load followed by load: OK on all.
1569 continue;
1570 }
1571
1572 // Special case for ALLOCFRAME: even though there is dependency
1573 // between ALLOCFRAME and subsequent store, allow it to be packetized
1574 // in a same packet. This implies that the store is using the caller's
1575 // SP. Hence, offset needs to be updated accordingly.
1576 if (DepType == SDep::Data && J.getOpcode() == Hexagon::S2_allocframe) {
1577 unsigned Opc = I.getOpcode();
1578 switch (Opc) {
1579 case Hexagon::S2_storerd_io:
1580 case Hexagon::S2_storeri_io:
1581 case Hexagon::S2_storerh_io:
1582 case Hexagon::S2_storerb_io:
1583 if (I.getOperand(0).getReg() == HRI->getStackRegister()) {
1584 // Since this store is to be glued with allocframe in the same
1585 // packet, it will use SP of the previous stack frame, i.e.
1586 // caller's SP. Therefore, we need to recalculate offset
1587 // according to this change.
1588 GlueAllocframeStore = useCallersSP(I);
1589 if (GlueAllocframeStore)
1590 continue;
1591 }
1592 break;
1593 default:
1594 break;
1595 }
1596 }
1597
1598 // There are certain anti-dependencies that cannot be ignored.
1599 // Specifically:
1600 // J2_call ... implicit-def %r0 ; SUJ
1601 // R0 = ... ; SUI
1602 // Those cannot be packetized together, since the call will observe
1603 // the effect of the assignment to R0.
1604 if ((DepType == SDep::Anti || DepType == SDep::Output) && J.isCall()) {
1605 // Check if I defines any volatile register. We should also check
1606 // registers that the call may read, but these happen to be a
1607 // subset of the volatile register set.
1608 for (const MachineOperand &Op : I.operands()) {
1609 if (Op.isReg() && Op.isDef()) {
1610 Register R = Op.getReg();
1611 if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI))
1612 continue;
1613 } else if (!Op.isRegMask()) {
1614 // If I has a regmask assume dependency.
1615 continue;
1616 }
1617 FoundSequentialDependence = true;
1618 break;
1619 }
1620 }
1621
1622 // Skip over remaining anti-dependences. Two instructions that are
1623 // anti-dependent can share a packet, since in most such cases all
1624 // operands are read before any modifications take place.
1625 // The exceptions are branch and call instructions, since they are
1626 // executed after all other instructions have completed (at least
1627 // conceptually).
1628 if (DepType != SDep::Anti) {
1629 FoundSequentialDependence = true;
1630 break;
1631 }
1632 }
1633
1634 if (FoundSequentialDependence) {
1635 Dependence = true;
1636 return false;
1637 }
1638
1639 return true;
1640}
1641
1643 assert(SUI->getInstr() && SUJ->getInstr());
1644 MachineInstr &I = *SUI->getInstr();
1645 MachineInstr &J = *SUJ->getInstr();
1646
1647 bool Coexist = !cannotCoexist(I, J);
1648
1649 if (Coexist && !Dependence)
1650 return true;
1651
1652 // Check if the instruction was promoted to a dot-new. If so, demote it
1653 // back into a dot-old.
1654 if (PromotedToDotNew)
1656
1657 cleanUpDotCur();
1658 // Check if the instruction (must be a store) was glued with an allocframe
1659 // instruction. If so, restore its offset to its original value, i.e. use
1660 // current SP instead of caller's SP.
1661 if (GlueAllocframeStore) {
1662 useCalleesSP(I);
1663 GlueAllocframeStore = false;
1664 }
1665
1666 if (ChangedOffset != INT64_MAX)
1668
1669 if (GlueToNewValueJump) {
1670 // Putting I and J together would prevent the new-value jump from being
1671 // packetized with the producer. In that case I and J must be separated.
1672 GlueToNewValueJump = false;
1673 return false;
1674 }
1675
1676 if (!Coexist)
1677 return false;
1678
1679 if (ChangedOffset == INT64_MAX && updateOffset(SUI, SUJ)) {
1680 FoundSequentialDependence = false;
1681 Dependence = false;
1682 return true;
1683 }
1684
1685 return false;
1686}
1687
1688
1690 bool FoundLoad = false;
1691 bool FoundStore = false;
1692
1693 for (auto *MJ : CurrentPacketMIs) {
1694 unsigned Opc = MJ->getOpcode();
1695 if (Opc == Hexagon::S2_allocframe || Opc == Hexagon::L2_deallocframe)
1696 continue;
1697 if (HII->isMemOp(*MJ))
1698 continue;
1699 if (MJ->mayLoad())
1700 FoundLoad = true;
1701 if (MJ->mayStore() && !HII->isNewValueStore(*MJ))
1702 FoundStore = true;
1703 }
1704 return FoundLoad && FoundStore;
1705}
1706
1707
1710 MachineBasicBlock::iterator MII = MI.getIterator();
1711 MachineBasicBlock *MBB = MI.getParent();
1712
1713 if (CurrentPacketMIs.empty()) {
1714 PacketStalls = false;
1715 PacketStallCycles = 0;
1716 }
1717 PacketStalls |= producesStall(MI);
1718 PacketStallCycles = std::max(PacketStallCycles, calcStall(MI));
1719
1720 if (MI.isImplicitDef()) {
1721 // Add to the packet to allow subsequent instructions to be checked
1722 // properly.
1723 CurrentPacketMIs.push_back(&MI);
1724 return MII;
1725 }
1727
1728 bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
1729 bool Good = true;
1730
1731 if (GlueToNewValueJump) {
1732 MachineInstr &NvjMI = *++MII;
1733 // We need to put both instructions in the same packet: MI and NvjMI.
1734 // Either of them can require a constant extender. Try to add both to
1735 // the current packet, and if that fails, end the packet and start a
1736 // new one.
1738 if (ExtMI)
1740
1741 bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
1742 if (Good) {
1745 else
1746 Good = false;
1747 }
1748 if (Good && ExtNvjMI)
1750
1751 if (!Good) {
1752 endPacket(MBB, MI);
1755 if (ExtMI) {
1758 }
1761 if (ExtNvjMI) {
1764 }
1765 }
1766 CurrentPacketMIs.push_back(&MI);
1767 CurrentPacketMIs.push_back(&NvjMI);
1768 return MII;
1769 }
1770
1772 if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
1773 endPacket(MBB, MI);
1774 if (PromotedToDotNew)
1776 if (GlueAllocframeStore) {
1778 GlueAllocframeStore = false;
1779 }
1782 }
1783
1784 CurrentPacketMIs.push_back(&MI);
1785 return MII;
1786}
1787
1790 // Replace VLIWPacketizerList::endPacket(MBB, EndMI).
1791 LLVM_DEBUG({
1792 if (!CurrentPacketMIs.empty()) {
1793 dbgs() << "Finalizing packet:\n";
1794 unsigned Idx = 0;
1795 for (MachineInstr *MI : CurrentPacketMIs) {
1796 unsigned R = ResourceTracker->getUsedResources(Idx++);
1797 dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
1798 }
1799 }
1800 });
1801
1802 bool memShufDisabled = getmemShufDisabled();
1803 if (memShufDisabled && !foundLSInPacket()) {
1804 setmemShufDisabled(false);
1805 LLVM_DEBUG(dbgs() << " Not added to NoShufPacket\n");
1806 }
1807 memShufDisabled = getmemShufDisabled();
1808
1809 OldPacketMIs.clear();
1810 for (MachineInstr *MI : CurrentPacketMIs) {
1811 MachineBasicBlock::instr_iterator NextMI = std::next(MI->getIterator());
1812 for (auto &I : make_range(HII->expandVGatherPseudo(*MI), NextMI))
1813 OldPacketMIs.push_back(&I);
1814 }
1815 CurrentPacketMIs.clear();
1816
1817 if (OldPacketMIs.size() > 1) {
1818 MachineBasicBlock::instr_iterator FirstMI(OldPacketMIs.front());
1819 MachineBasicBlock::instr_iterator LastMI(EndMI.getInstrIterator());
1820 finalizeBundle(*MBB, FirstMI, LastMI);
1821 auto BundleMII = std::prev(FirstMI);
1822 if (memShufDisabled)
1823 HII->setBundleNoShuf(BundleMII);
1824
1825 setmemShufDisabled(false);
1826 }
1827
1828 PacketHasDuplex = false;
1829 PacketHasSLOT0OnlyInsn = false;
1830 ResourceTracker->clearResources();
1831 LLVM_DEBUG(dbgs() << "End packet\n");
1832}
1833
1835 if (Minimal)
1836 return false;
1837
1838 if (producesStall(MI))
1839 return false;
1840
1841 // If TinyCore with Duplexes is enabled, check if this MI can form a Duplex
1842 // with any other instruction in the existing packet.
1843 auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>();
1844 // Constraint 1: Only one duplex allowed per packet.
1845 // Constraint 2: Consider duplex checks only if there is atleast one
1846 // instruction in a packet.
1847 // Constraint 3: If one of the existing instructions in the packet has a
1848 // SLOT0 only instruction that can not be duplexed, do not attempt to form
1849 // duplexes. (TODO: This will invalidate the L4_return* instructions to form a
1850 // duplex)
1851 if (HST.isTinyCoreWithDuplex() && CurrentPacketMIs.size() > 0 &&
1852 !PacketHasDuplex) {
1853 // Check for SLOT0 only non-duplexable instruction in packet.
1854 for (auto &MJ : CurrentPacketMIs)
1855 PacketHasSLOT0OnlyInsn |= HII->isPureSlot0(*MJ);
1856 // Get the Big Core Opcode (dup_*).
1857 int Opcode = HII->getDuplexOpcode(MI, false);
1858 if (Opcode >= 0) {
1859 // We now have an instruction that can be duplexed.
1860 for (auto &MJ : CurrentPacketMIs) {
1861 if (HII->isDuplexPair(MI, *MJ) && !PacketHasSLOT0OnlyInsn) {
1862 PacketHasDuplex = true;
1863 return true;
1864 }
1865 }
1866 // If it can not be duplexed, check if there is a valid transition in DFA
1867 // with the original opcode.
1868 MachineInstr &MIRef = const_cast<MachineInstr &>(MI);
1869 MIRef.setDesc(HII->get(Opcode));
1870 return ResourceTracker->canReserveResources(MIRef);
1871 }
1872 }
1873
1874 return true;
1875}
1876
1877// V60 forward scheduling.
1879 // Check whether the previous packet is in a different loop. If this is the
1880 // case, there is little point in trying to avoid a stall because that would
1881 // favor the rare case (loop entry) over the common case (loop iteration).
1882 //
1883 // TODO: We should really be able to check all the incoming edges if this is
1884 // the first packet in a basic block, so we can avoid stalls from the loop
1885 // backedge.
1886 if (!OldPacketMIs.empty()) {
1887 auto *OldBB = OldPacketMIs.front()->getParent();
1888 auto *ThisBB = I.getParent();
1889 if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
1890 return 0;
1891 }
1892
1893 SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)];
1894 if (!SUI)
1895 return 0;
1896
1897 // If the latency is 0 and there is a data dependence between this
1898 // instruction and any instruction in the current packet, we disregard any
1899 // potential stalls due to the instructions in the previous packet. Most of
1900 // the instruction pairs that can go together in the same packet have 0
1901 // latency between them. The exceptions are
1902 // 1. NewValueJumps as they're generated much later and the latencies can't
1903 // be changed at that point.
1904 // 2. .cur instructions, if its consumer has a 0 latency successor (such as
1905 // .new). In this case, the latency between .cur and the consumer stays
1906 // non-zero even though we can have both .cur and .new in the same packet.
1907 // Changing the latency to 0 is not an option as it causes software pipeliner
1908 // to not pipeline in some cases.
1909
1910 // For Example:
1911 // {
1912 // I1: v6.cur = vmem(r0++#1)
1913 // I2: v7 = valign(v6,v4,r2)
1914 // I3: vmem(r5++#1) = v7.new
1915 // }
1916 // Here I2 and I3 has 0 cycle latency, but I1 and I2 has 2.
1917
1918 for (auto *J : CurrentPacketMIs) {
1919 SUnit *SUJ = MIToSUnit[J];
1920 for (auto &Pred : SUI->Preds)
1921 if (Pred.getSUnit() == SUJ)
1922 if ((Pred.getLatency() == 0 && Pred.isAssignedRegDep()) ||
1923 HII->isNewValueJump(I) || HII->isToBeScheduledASAP(*J, I))
1924 return 0;
1925 }
1926
1927 // Check if the latency is greater than one between this instruction and any
1928 // instruction in the previous packet.
1929 for (auto *J : OldPacketMIs) {
1930 SUnit *SUJ = MIToSUnit[J];
1931 for (auto &Pred : SUI->Preds)
1932 if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1)
1933 return Pred.getLatency();
1934 }
1935
1936 return 0;
1937}
1938
1940 unsigned int Latency = calcStall(I);
1941 if (Latency == 0)
1942 return false;
1943 // Ignore stall unless it stalls more than previous instruction in packet
1944 if (PacketStalls)
1945 return Latency > PacketStallCycles;
1946 return true;
1947}
1948
1949//===----------------------------------------------------------------------===//
1950// Public Constructor Functions
1951//===----------------------------------------------------------------------===//
1952
1954 return new HexagonPacketizer(Minimal);
1955}
aarch64 promote const
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:478
cl::opt< bool > ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, cl::init(false), cl::desc("Do not consider inline-asm a scheduling/" "packetization boundary."))
#define HEXAGON_LRFP_SIZE
static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ, const HexagonInstrInfo &HII)
static bool isDirectJump(const MachineInstr &MI)
static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI, MachineBasicBlock::iterator BundleIt, bool Before)
static bool isRegDependence(const SDep::Kind DepType)
static const MachineOperand & getStoreValueOperand(const MachineInstr &MI)
static cl::opt< bool > EnableGenAllInsnClass("enable-gen-insn", cl::Hidden, cl::desc("Generate all instruction with TC"))
static bool isControlFlow(const MachineInstr &MI)
hexagon Hexagon Packetizer
static cl::opt< bool > DisableVecDblNVStores("disable-vecdbl-nv-stores", cl::Hidden, cl::desc("Disable vector double new-value-stores"))
static PredicateKind getPredicateSense(const MachineInstr &MI, const HexagonInstrInfo *HII)
Returns true if an instruction is predicated on p0 and false if it's predicated on !...
static unsigned getPredicatedRegister(MachineInstr &MI, const HexagonInstrInfo *QII)
Gets the predicate register of a predicated instruction.
static cl::opt< bool > DisablePacketizer("disable-packetizer", cl::Hidden, cl::desc("Disable Hexagon packetizer pass"))
hexagon packetizer
static cl::opt< bool > Slot1Store("slot1-store-slot0-load", cl::Hidden, cl::init(true), cl::desc("Allow slot1 store and slot0 load"))
static cl::opt< bool > PacketizeVolatiles("hexagon-packetize-volatiles", cl::Hidden, cl::init(true), cl::desc("Allow non-solo packetization of volatile memory references"))
cl::opt< bool > ScheduleInlineAsm
static bool hasWriteToReadDep(const MachineInstr &FirstI, const MachineInstr &SecondI, const TargetRegisterInfo *TRI)
static bool doesModifyCalleeSavedReg(const MachineInstr &MI, const TargetRegisterInfo *TRI)
Returns true if the instruction modifies a callee-saved register.
static bool isLoadAbsSet(const MachineInstr &MI)
static const MachineOperand & getAbsSetOperand(const MachineInstr &MI)
static const MachineOperand & getPostIncrementOperand(const MachineInstr &MI, const HexagonInstrInfo *HII)
static bool isImplicitDependency(const MachineInstr &I, bool CheckDef, unsigned DepReg)
static bool isSchedBarrier(const MachineInstr &MI)
static bool isSystemInstr(const MachineInstr &MI)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
static constexpr uint32_t Opcode
Definition: aarch32.h:200
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
const InstrItineraryData * getInstrItins() const
bool canReserveResources(const MCInstrDesc *MID)
void reserveResources(const MCInstrDesc *MID)
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Dependence - This class represents a dependence between two memory memory references in a function.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool isPredicated(const MachineInstr &MI) const override
Returns true if the instruction is already predicated.
bool isHVXMemWithAIndirect(const MachineInstr &I, const MachineInstr &J) const
bool isRestrictNoSlot1Store(const MachineInstr &MI) const
bool isPureSlot0(const MachineInstr &MI) const
bool isPostIncrement(const MachineInstr &MI) const override
Return true for post-incremented instructions.
uint64_t getType(const MachineInstr &MI) const
bool isPredicatedTrue(const MachineInstr &MI) const
bool isNewValueStore(const MachineInstr &MI) const
bool arePredicatesComplements(MachineInstr &MI1, MachineInstr &MI2)
bool updateOffset(SUnit *SUI, SUnit *SUJ)
Return true if we can update the offset in MI so that MI and MJ can be packetized together.
void endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI) override
bool isCallDependent(const MachineInstr &MI, SDep::Kind DepType, unsigned DepReg)
bool promoteToDotCur(MachineInstr &MI, SDep::Kind DepType, MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC)
bool promoteToDotNew(MachineInstr &MI, SDep::Kind DepType, MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC)
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override
bool canPromoteToDotCur(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC)
void useCalleesSP(MachineInstr &MI)
bool demoteToDotOld(MachineInstr &MI)
bool cannotCoexist(const MachineInstr &MI, const MachineInstr &MJ)
const MachineLoopInfo * MLI
bool isSoloInstruction(const MachineInstr &MI) override
bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override
bool hasControlDependence(const MachineInstr &I, const MachineInstr &J)
bool restrictingDepExistInPacket(MachineInstr &, unsigned)
bool producesStall(const MachineInstr &MI)
void undoChangedOffset(MachineInstr &MI)
Undo the changed offset.
bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J)
unsigned int calcStall(const MachineInstr &MI)
bool canPromoteToDotNew(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC)
bool canPromoteToNewValue(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII)
bool ignorePseudoInstruction(const MachineInstr &MI, const MachineBasicBlock *MBB) override
void unpacketizeSoloInstrs(MachineFunction &MF)
const MachineBranchProbabilityInfo * MBPI
A handle to the branch probability pass.
bool shouldAddToPacket(const MachineInstr &MI) override
bool useCallersSP(MachineInstr &MI)
bool canPromoteToNewValueStore(const MachineInstr &MI, const MachineInstr &PacketMI, unsigned DepReg)
bool tryAllocateResourcesForConstExt(bool Reserve)
MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override
bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J)
bool isNewifiable(const MachineInstr &MI, const TargetRegisterClass *NewRC)
bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J)
Register getFrameRegister(const MachineFunction &MF) const override
const HexagonInstrInfo * getInstrInfo() const override
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:939
bool isImplicitDef() const
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:930
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:915
bool isInlineAsm() const
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:947
void unbundleFromPred()
Break bundle above this instruction.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:540
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:659
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
bool isBundledWithSucc() const
Return true if this instruction is part of a bundle, and it is not the last instruction in the bundle...
Definition: MachineInstr.h:455
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
MachineFunction & MF
bool alias(const MachineInstr &MI1, const MachineInstr &MI2, bool UseTBAA=true) const
std::vector< MachineInstr * > CurrentPacketMIs
std::map< MachineInstr *, SUnit * > MIToSUnit
DFAPacketizer * ResourceTracker
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
void initializeHexagonPacketizerPass(PassRegistry &)
FunctionPass * createHexagonPacketizer(bool Minimal)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
#define OP(n)
Definition: regex2.h:73
FuncUnits getUnits() const
Returns the choice of FUs.