54#define DEBUG_TYPE "x86-avoid-sfb"
61 "x86-sfb-inspection-limit",
62 cl::desc(
"X86: Number of instructions backward to "
63 "inspect for store forwarding blocks."),
68using DisplacementSizeMap = std::map<int64_t, unsigned>;
70class X86AvoidSFBImpl {
73 bool runOnMachineFunction(MachineFunction &MF);
76 MachineRegisterInfo *MRI =
nullptr;
77 const X86InstrInfo *TII =
nullptr;
78 const X86RegisterInfo *TRI =
nullptr;
80 BlockedLoadsStoresPairs;
81 SmallVector<MachineInstr *, 2> ForRemoval;
86 void findPotentiallylBlockedCopies(MachineFunction &MF);
90 void breakBlockedCopies(MachineInstr *LoadInst, MachineInstr *StoreInst,
91 const DisplacementSizeMap &BlockingStoresDispSizeMap);
93 void buildCopies(
int Size, MachineInstr *LoadInst, int64_t LdDispImm,
94 MachineInstr *StoreInst, int64_t StDispImm, int64_t
Offset);
96 void buildCopy(MachineInstr *LoadInst,
unsigned NLoadOpcode, int64_t LoadDisp,
97 MachineInstr *StoreInst,
unsigned NStoreOpcode,
98 int64_t StoreDisp,
unsigned Size, int64_t
Offset);
100 bool alias(
const MachineMemOperand &Op1,
const MachineMemOperand &Op2)
const;
102 unsigned getRegSizeInBytes(MachineInstr *Inst);
108 X86AvoidSFBLegacy() : MachineFunctionPass(ID) {}
110 StringRef getPassName()
const override {
111 return "X86 Avoid Store Forwarding Blocks";
114 bool runOnMachineFunction(MachineFunction &MF)
override;
116 void getAnalysisUsage(AnalysisUsage &AU)
const override {
124char X86AvoidSFBLegacy::ID = 0;
133 return new X86AvoidSFBLegacy();
137 return Opcode == X86::MOVUPSrm || Opcode == X86::MOVAPSrm ||
138 Opcode == X86::VMOVUPSrm || Opcode == X86::VMOVAPSrm ||
139 Opcode == X86::VMOVUPDrm || Opcode == X86::VMOVAPDrm ||
140 Opcode == X86::VMOVDQUrm || Opcode == X86::VMOVDQArm ||
141 Opcode == X86::VMOVUPSZ128rm || Opcode == X86::VMOVAPSZ128rm ||
142 Opcode == X86::VMOVUPDZ128rm || Opcode == X86::VMOVAPDZ128rm ||
143 Opcode == X86::VMOVDQU64Z128rm || Opcode == X86::VMOVDQA64Z128rm ||
144 Opcode == X86::VMOVDQU32Z128rm || Opcode == X86::VMOVDQA32Z128rm;
147 return Opcode == X86::VMOVUPSYrm || Opcode == X86::VMOVAPSYrm ||
148 Opcode == X86::VMOVUPDYrm || Opcode == X86::VMOVAPDYrm ||
149 Opcode == X86::VMOVDQUYrm || Opcode == X86::VMOVDQAYrm ||
150 Opcode == X86::VMOVUPSZ256rm || Opcode == X86::VMOVAPSZ256rm ||
151 Opcode == X86::VMOVUPDZ256rm || Opcode == X86::VMOVAPDZ256rm ||
152 Opcode == X86::VMOVDQU64Z256rm || Opcode == X86::VMOVDQA64Z256rm ||
153 Opcode == X86::VMOVDQU32Z256rm || Opcode == X86::VMOVDQA32Z256rm;
164 return StOpcode == X86::MOVUPSmr || StOpcode == X86::MOVAPSmr;
167 return StOpcode == X86::VMOVUPSmr || StOpcode == X86::VMOVAPSmr;
170 return StOpcode == X86::VMOVUPDmr || StOpcode == X86::VMOVAPDmr;
173 return StOpcode == X86::VMOVDQUmr || StOpcode == X86::VMOVDQAmr;
174 case X86::VMOVUPSZ128rm:
175 case X86::VMOVAPSZ128rm:
176 return StOpcode == X86::VMOVUPSZ128mr || StOpcode == X86::VMOVAPSZ128mr;
177 case X86::VMOVUPDZ128rm:
178 case X86::VMOVAPDZ128rm:
179 return StOpcode == X86::VMOVUPDZ128mr || StOpcode == X86::VMOVAPDZ128mr;
180 case X86::VMOVUPSYrm:
181 case X86::VMOVAPSYrm:
182 return StOpcode == X86::VMOVUPSYmr || StOpcode == X86::VMOVAPSYmr;
183 case X86::VMOVUPDYrm:
184 case X86::VMOVAPDYrm:
185 return StOpcode == X86::VMOVUPDYmr || StOpcode == X86::VMOVAPDYmr;
186 case X86::VMOVDQUYrm:
187 case X86::VMOVDQAYrm:
188 return StOpcode == X86::VMOVDQUYmr || StOpcode == X86::VMOVDQAYmr;
189 case X86::VMOVUPSZ256rm:
190 case X86::VMOVAPSZ256rm:
191 return StOpcode == X86::VMOVUPSZ256mr || StOpcode == X86::VMOVAPSZ256mr;
192 case X86::VMOVUPDZ256rm:
193 case X86::VMOVAPDZ256rm:
194 return StOpcode == X86::VMOVUPDZ256mr || StOpcode == X86::VMOVAPDZ256mr;
195 case X86::VMOVDQU64Z128rm:
196 case X86::VMOVDQA64Z128rm:
197 return StOpcode == X86::VMOVDQU64Z128mr || StOpcode == X86::VMOVDQA64Z128mr;
198 case X86::VMOVDQU32Z128rm:
199 case X86::VMOVDQA32Z128rm:
200 return StOpcode == X86::VMOVDQU32Z128mr || StOpcode == X86::VMOVDQA32Z128mr;
201 case X86::VMOVDQU64Z256rm:
202 case X86::VMOVDQA64Z256rm:
203 return StOpcode == X86::VMOVDQU64Z256mr || StOpcode == X86::VMOVDQA64Z256mr;
204 case X86::VMOVDQU32Z256rm:
205 case X86::VMOVDQA32Z256rm:
206 return StOpcode == X86::VMOVDQU32Z256mr || StOpcode == X86::VMOVDQA32Z256mr;
214 PBlock |= Opcode == X86::MOV64mr || Opcode == X86::MOV64mi32 ||
215 Opcode == X86::MOV32mr || Opcode == X86::MOV32mi ||
216 Opcode == X86::MOV16mr || Opcode == X86::MOV16mi ||
217 Opcode == X86::MOV8mr || Opcode == X86::MOV8mi;
219 PBlock |= Opcode == X86::VMOVUPSmr || Opcode == X86::VMOVAPSmr ||
220 Opcode == X86::VMOVUPDmr || Opcode == X86::VMOVAPDmr ||
221 Opcode == X86::VMOVDQUmr || Opcode == X86::VMOVDQAmr ||
222 Opcode == X86::VMOVUPSZ128mr || Opcode == X86::VMOVAPSZ128mr ||
223 Opcode == X86::VMOVUPDZ128mr || Opcode == X86::VMOVAPDZ128mr ||
224 Opcode == X86::VMOVDQU64Z128mr ||
225 Opcode == X86::VMOVDQA64Z128mr ||
226 Opcode == X86::VMOVDQU32Z128mr || Opcode == X86::VMOVDQA32Z128mr;
237 switch (LoadOpcode) {
238 case X86::VMOVUPSYrm:
239 case X86::VMOVAPSYrm:
240 return X86::VMOVUPSrm;
241 case X86::VMOVUPDYrm:
242 case X86::VMOVAPDYrm:
243 return X86::VMOVUPDrm;
244 case X86::VMOVDQUYrm:
245 case X86::VMOVDQAYrm:
246 return X86::VMOVDQUrm;
247 case X86::VMOVUPSZ256rm:
248 case X86::VMOVAPSZ256rm:
249 return X86::VMOVUPSZ128rm;
250 case X86::VMOVUPDZ256rm:
251 case X86::VMOVAPDZ256rm:
252 return X86::VMOVUPDZ128rm;
253 case X86::VMOVDQU64Z256rm:
254 case X86::VMOVDQA64Z256rm:
255 return X86::VMOVDQU64Z128rm;
256 case X86::VMOVDQU32Z256rm:
257 case X86::VMOVDQA32Z256rm:
258 return X86::VMOVDQU32Z128rm;
266 switch (StoreOpcode) {
267 case X86::VMOVUPSYmr:
268 case X86::VMOVAPSYmr:
269 return X86::VMOVUPSmr;
270 case X86::VMOVUPDYmr:
271 case X86::VMOVAPDYmr:
272 return X86::VMOVUPDmr;
273 case X86::VMOVDQUYmr:
274 case X86::VMOVDQAYmr:
275 return X86::VMOVDQUmr;
276 case X86::VMOVUPSZ256mr:
277 case X86::VMOVAPSZ256mr:
278 return X86::VMOVUPSZ128mr;
279 case X86::VMOVUPDZ256mr:
280 case X86::VMOVAPDZ256mr:
281 return X86::VMOVUPDZ128mr;
282 case X86::VMOVDQU64Z256mr:
283 case X86::VMOVDQA64Z256mr:
284 return X86::VMOVDQU64Z128mr;
285 case X86::VMOVDQU32Z256mr:
286 case X86::VMOVDQA32Z256mr:
287 return X86::VMOVDQU32Z128mr;
297 assert(AddrOffset != -1 &&
"Expected Memory Operand");
323 if (!((
Base.isReg() &&
Base.getReg() != X86::NoRegister) ||
Base.isFI()))
329 if (!(Index.isReg() && Index.getReg() == X86::NoRegister))
331 if (!(Segment.
isReg() && Segment.
getReg() == X86::NoRegister))
344 unsigned BlockCount = 0;
348 PBInst !=
E; ++PBInst) {
349 if (PBInst->isMetaInstruction())
352 if (BlockCount >= InspectionLimit)
355 if (
MI.getDesc().isCall())
356 return PotentialBlockers;
363 if (BlockCount < InspectionLimit) {
365 int LimitLeft = InspectionLimit - BlockCount;
369 if (PBInst.isMetaInstruction())
372 if (PredCount >= LimitLeft)
374 if (PBInst.getDesc().isCall())
380 return PotentialBlockers;
385 unsigned NStoreOpcode, int64_t StoreDisp,
395 MachineInstr *NewLoad =
405 if (LoadBase.
isReg())
410 MachineInstr *StInst = StoreInst;
413 if (PrevInstrIt.getNodePtr() == LoadInst)
415 MachineInstr *NewStore =
425 if (StoreBase.
isReg())
428 assert(StoreSrcVReg.
isReg() &&
"Expected virtual register");
433void X86AvoidSFBImpl::buildCopies(
int Size, MachineInstr *LoadInst,
434 int64_t LdDispImm, MachineInstr *StoreInst,
435 int64_t StDispImm, int64_t
Offset) {
436 int LdDisp = LdDispImm;
437 int StDisp = StDispImm;
451 buildCopy(LoadInst, X86::MOV64rm, LdDisp, StoreInst, X86::MOV64mr, StDisp,
460 buildCopy(LoadInst, X86::MOV32rm, LdDisp, StoreInst, X86::MOV32mr, StDisp,
469 buildCopy(LoadInst, X86::MOV16rm, LdDisp, StoreInst, X86::MOV16mr, StDisp,
478 buildCopy(LoadInst, X86::MOV8rm, LdDisp, StoreInst, X86::MOV8mr, StDisp,
492 auto *StorePrevNonDbgInstr =
496 if (LoadBase.
isReg()) {
502 if (StorePrevNonDbgInstr ==
LoadInst)
506 if (StoreBase.
isReg()) {
508 if (StorePrevNonDbgInstr ==
LoadInst)
514bool X86AvoidSFBImpl::alias(
const MachineMemOperand &Op1,
515 const MachineMemOperand &Op2)
const {
528void X86AvoidSFBImpl::findPotentiallylBlockedCopies(MachineFunction &MF) {
530 for (
auto &
MI :
MBB) {
536 for (MachineOperand &StoreMO :
538 MachineInstr &StoreMI = *StoreMO.getParent();
546 const MachineMemOperand *LMMO = *
MI.memoperands_begin();
551 if (!alias(*LMMO, *SMMO))
552 BlockedLoadsStoresPairs.push_back(std::make_pair(&
MI, &StoreMI));
558unsigned X86AvoidSFBImpl::getRegSizeInBytes(MachineInstr *LoadInst) {
559 const auto *TRC =
TII->getRegClass(
TII->get(LoadInst->
getOpcode()), 0);
560 return TRI->getRegSizeInBits(*TRC) / 8;
563void X86AvoidSFBImpl::breakBlockedCopies(
564 MachineInstr *LoadInst, MachineInstr *StoreInst,
565 const DisplacementSizeMap &BlockingStoresDispSizeMap) {
570 int64_t LdDisp1 = LdDispImm;
572 int64_t StDisp1 = StDispImm;
576 int64_t LdStDelta = StDispImm - LdDispImm;
578 for (
auto DispSizePair : BlockingStoresDispSizeMap) {
579 LdDisp2 = DispSizePair.first;
580 StDisp2 = DispSizePair.first + LdStDelta;
581 Size2 = DispSizePair.second;
583 if (LdDisp2 < LdDisp1) {
584 int OverlapDelta = LdDisp1 - LdDisp2;
585 LdDisp2 += OverlapDelta;
586 StDisp2 += OverlapDelta;
587 Size2 -= OverlapDelta;
589 Size1 = LdDisp2 - LdDisp1;
593 buildCopies(Size1, LoadInst, LdDisp1, StoreInst, StDisp1,
Offset);
595 buildCopies(Size2, LoadInst, LdDisp2, StoreInst, StDisp2,
Offset + Size1);
596 LdDisp1 = LdDisp2 + Size2;
597 StDisp1 = StDisp2 + Size2;
600 unsigned Size3 = (LdDispImm + getRegSizeInBytes(LoadInst)) - LdDisp1;
601 buildCopies(Size3, LoadInst, LdDisp1, StoreInst, StDisp1,
Offset);
610 if (LoadBase.
isReg())
616 int64_t StoreDispImm,
unsigned StoreSize) {
617 return ((StoreDispImm >= LoadDispImm) &&
618 (StoreDispImm <= LoadDispImm + (LoadSize - StoreSize)));
624 int64_t DispImm,
unsigned Size) {
625 auto [It, Inserted] = BlockingStoresDispSizeMap.try_emplace(DispImm,
Size);
627 if (!Inserted && It->second >
Size)
634 if (BlockingStoresDispSizeMap.size() <= 1)
638 for (
auto DispSizePair : BlockingStoresDispSizeMap) {
639 int64_t CurrDisp = DispSizePair.first;
640 unsigned CurrSize = DispSizePair.second;
641 while (DispSizeStack.
size()) {
642 int64_t PrevDisp = DispSizeStack.
back().first;
643 unsigned PrevSize = DispSizeStack.
back().second;
644 if (CurrDisp + CurrSize > PrevDisp + PrevSize)
650 BlockingStoresDispSizeMap.
clear();
651 for (
auto Disp : DispSizeStack)
652 BlockingStoresDispSizeMap.insert(Disp);
655bool X86AvoidSFBImpl::runOnMachineFunction(MachineFunction &MF) {
663 assert(MRI->
isSSA() &&
"Expected MIR to be in SSA form");
668 findPotentiallylBlockedCopies(MF);
670 for (
auto LoadStoreInstPair : BlockedLoadsStoresPairs) {
671 MachineInstr *LoadInst = LoadStoreInstPair.first;
673 DisplacementSizeMap BlockingStoresDispSizeMap;
675 SmallVector<MachineInstr *, 2> PotentialBlockers =
677 for (
auto *PBInst : PotentialBlockers) {
683 unsigned PBstSize = (*PBInst->memoperands_begin())->getSize().getValue();
695 if (BlockingStoresDispSizeMap.empty())
701 MachineInstr *StoreInst = LoadStoreInstPair.second;
707 breakBlockedCopies(LoadInst, StoreInst, BlockingStoresDispSizeMap);
712 for (
auto *RemovedInst : ForRemoval) {
713 RemovedInst->eraseFromParent();
716 BlockedLoadsStoresPairs.clear();
722bool X86AvoidSFBLegacy::runOnMachineFunction(MachineFunction &MF) {
725 AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
726 X86AvoidSFBImpl Impl(AA);
727 return Impl.runOnMachineFunction(MF);
737 X86AvoidSFBImpl Impl(
AA);
738 bool Changed = Impl.runOnMachineFunction(MF);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static unsigned getYMMtoXMMLoadOpcode(unsigned LoadOpcode)
static bool isPotentialBlockedMemCpyLd(unsigned Opcode)
static bool isPotentialBlockedMemCpyPair(unsigned LdOpcode, unsigned StOpcode)
static bool isPotentialBlockingStoreInst(unsigned Opcode, unsigned LoadOpcode)
static bool isXMMLoadOpcode(unsigned Opcode)
static int getAddrOffset(const MachineInstr *MI)
static cl::opt< unsigned > X86AvoidSFBInspectionLimit("x86-sfb-inspection-limit", cl::desc("X86: Number of instructions backward to " "inspect for store forwarding blocks."), cl::init(20), cl::Hidden)
static bool isBlockingStore(int64_t LoadDispImm, unsigned LoadSize, int64_t StoreDispImm, unsigned StoreSize)
static bool isRelevantAddressingMode(MachineInstr *MI)
static cl::opt< bool > DisableX86AvoidStoreForwardBlocks("x86-disable-avoid-SFB", cl::Hidden, cl::desc("X86: Disable Store Forwarding Blocks fixup."), cl::init(false))
static void removeRedundantBlockingStores(DisplacementSizeMap &BlockingStoresDispSizeMap)
static bool hasSameBaseOpValue(MachineInstr *LoadInst, MachineInstr *StoreInst)
static void updateBlockingStoresDispSizeMap(DisplacementSizeMap &BlockingStoresDispSizeMap, int64_t DispImm, unsigned Size)
static MachineOperand & getBaseOperand(MachineInstr *MI)
static unsigned getYMMtoXMMStoreOpcode(unsigned StoreOpcode)
static SmallVector< MachineInstr *, 2 > findPotentialBlockers(MachineInstr *LoadInst)
static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst)
static MachineOperand & getDispOperand(MachineInstr *MI)
static bool isYMMLoadOpcode(unsigned Opcode)
static const int MOV128SZ
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
FunctionPass class - This class is used to implement most global optimizations.
An instruction for reading from memory.
TypeSize getValue() const
Describe properties that are true of each instruction in the target description file.
instr_iterator instr_begin()
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
int getMemoryOperandNo(uint64_t TSFlags)
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createX86AvoidStoreForwardingBlocksLegacyPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.