LLVM 18.0.0git
X86FixupLEAs.cpp
Go to the documentation of this file.
1//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass that finds instructions that can be
10// re-written as LEA instructions in order to reduce pipeline delays.
11// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrInfo.h"
17#include "X86Subtarget.h"
18#include "llvm/ADT/Statistic.h"
24#include "llvm/CodeGen/Passes.h"
26#include "llvm/Support/Debug.h"
28using namespace llvm;
29
30#define FIXUPLEA_DESC "X86 LEA Fixup"
31#define FIXUPLEA_NAME "x86-fixup-LEAs"
32
33#define DEBUG_TYPE FIXUPLEA_NAME
34
35STATISTIC(NumLEAs, "Number of LEA instructions created");
36
37namespace {
38class FixupLEAPass : public MachineFunctionPass {
39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40
41 /// Given a machine register, look for the instruction
42 /// which writes it in the current basic block. If found,
43 /// try to replace it with an equivalent LEA instruction.
44 /// If replacement succeeds, then also process the newly created
45 /// instruction.
46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
48
49 /// Given a memory access or LEA instruction
50 /// whose address mode uses a base and/or index register, look for
51 /// an opportunity to replace the instruction which sets the base or index
52 /// register with an equivalent LEA instruction.
53 void processInstruction(MachineBasicBlock::iterator &I,
55
56 /// Given a LEA instruction which is unprofitable
57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
60
61 /// Given a LEA instruction which is unprofitable
62 /// on SNB+ try to replace it with other instructions.
63 /// According to Intel's Optimization Reference Manual:
64 /// " For LEA instructions with three source operands and some specific
65 /// situations, instruction latency has increased to 3 cycles, and must
66 /// dispatch via port 1:
67 /// - LEA that has all three source operands: base, index, and offset
68 /// - LEA that uses base and index registers where the base is EBP, RBP,
69 /// or R13
70 /// - LEA that uses RIP relative addressing mode
71 /// - LEA that uses 16-bit addressing mode "
72 /// This function currently handles the first 2 cases only.
73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74 MachineBasicBlock &MBB, bool OptIncDec);
75
76 /// Look for LEAs that are really two address LEAs that we might be able to
77 /// turn into regular ADD instructions.
78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79 MachineBasicBlock &MBB, bool OptIncDec,
80 bool UseLEAForSP) const;
81
82 /// Look for and transform the sequence
83 /// lea (reg1, reg2), reg3
84 /// sub reg3, reg4
85 /// to
86 /// sub reg1, reg4
87 /// sub reg2, reg4
88 /// It can also optimize the sequence lea/add similarly.
89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
90
91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92 /// the dest register of LEA instruction I.
94 MachineBasicBlock &MBB) const;
95
96 /// Check instructions between LeaI and AluI (exclusively).
97 /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98 /// Set AluDestRef to true if the dest register of AluI is used or defined.
99 /// *KilledBase is set to the killed base register usage.
100 /// *KilledIndex is set to the killed index register usage.
101 void checkRegUsage(MachineBasicBlock::iterator &LeaI,
102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
103 bool &AluDestRef, MachineOperand **KilledBase,
104 MachineOperand **KilledIndex) const;
105
106 /// Determine if an instruction references a machine register
107 /// and, if so, whether it reads or writes the register.
108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
109
110 /// Step backwards through a basic block, looking
111 /// for an instruction which writes a register within
112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
116
117 /// if an instruction can be converted to an
118 /// equivalent LEA, insert the new instruction into the basic block
119 /// and return a pointer to it. Otherwise, return zero.
120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
122
123public:
124 static char ID;
125
126 StringRef getPassName() const override { return FIXUPLEA_DESC; }
127
128 FixupLEAPass() : MachineFunctionPass(ID) { }
129
130 /// Loop over all of the basic blocks,
131 /// replacing instructions by equivalent LEA instructions
132 /// if needed and when possible.
133 bool runOnMachineFunction(MachineFunction &MF) override;
134
135 // This pass runs after regalloc and doesn't support VReg operands.
138 MachineFunctionProperties::Property::NoVRegs);
139 }
140
141 void getAnalysisUsage(AnalysisUsage &AU) const override {
145 }
146
147private:
149 const X86InstrInfo *TII = nullptr;
150 const X86RegisterInfo *TRI = nullptr;
151};
152}
153
154char FixupLEAPass::ID = 0;
155
156INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
157
159FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
160 MachineBasicBlock::iterator &MBBI) const {
161 MachineInstr &MI = *MBBI;
162 switch (MI.getOpcode()) {
163 case X86::MOV32rr:
164 case X86::MOV64rr: {
165 const MachineOperand &Src = MI.getOperand(1);
166 const MachineOperand &Dest = MI.getOperand(0);
167 MachineInstr *NewMI =
168 BuildMI(MBB, MBBI, MI.getDebugLoc(),
169 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
170 : X86::LEA64r))
171 .add(Dest)
172 .add(Src)
173 .addImm(1)
174 .addReg(0)
175 .addImm(0)
176 .addReg(0);
177 return NewMI;
178 }
179 }
180
181 if (!MI.isConvertibleTo3Addr())
182 return nullptr;
183
184 switch (MI.getOpcode()) {
185 default:
186 // Only convert instructions that we've verified are safe.
187 return nullptr;
188 case X86::ADD64ri32:
189 case X86::ADD64ri32_DB:
190 case X86::ADD32ri:
191 case X86::ADD32ri_DB:
192 if (!MI.getOperand(2).isImm()) {
193 // convertToThreeAddress will call getImm()
194 // which requires isImm() to be true
195 return nullptr;
196 }
197 break;
198 case X86::SHL64ri:
199 case X86::SHL32ri:
200 case X86::INC64r:
201 case X86::INC32r:
202 case X86::DEC64r:
203 case X86::DEC32r:
204 case X86::ADD64rr:
205 case X86::ADD64rr_DB:
206 case X86::ADD32rr:
207 case X86::ADD32rr_DB:
208 // These instructions are all fine to convert.
209 break;
210 }
211 return TII->convertToThreeAddress(MI, nullptr, nullptr);
212}
213
214FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
215
216static bool isLEA(unsigned Opcode) {
217 return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
218 Opcode == X86::LEA64_32r;
219}
220
221bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
222 if (skipFunction(MF.getFunction()))
223 return false;
224
226 bool IsSlowLEA = ST.slowLEA();
227 bool IsSlow3OpsLEA = ST.slow3OpsLEA();
228 bool LEAUsesAG = ST.leaUsesAG();
229
230 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
231 bool UseLEAForSP = ST.useLeaForSP();
232
233 TSM.init(&ST);
234 TII = ST.getInstrInfo();
235 TRI = ST.getRegisterInfo();
236 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
237 auto *MBFI = (PSI && PSI->hasProfileSummary())
238 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
239 : nullptr;
240
241 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
242 for (MachineBasicBlock &MBB : MF) {
243 // First pass. Try to remove or optimize existing LEAs.
244 bool OptIncDecPerBB =
245 OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
246 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
247 if (!isLEA(I->getOpcode()))
248 continue;
249
250 if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
251 continue;
252
253 if (IsSlowLEA)
254 processInstructionForSlowLEA(I, MBB);
255 else if (IsSlow3OpsLEA)
256 processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
257 }
258
259 // Second pass for creating LEAs. This may reverse some of the
260 // transformations above.
261 if (LEAUsesAG) {
262 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
263 processInstruction(I, MBB);
264 }
265 }
266
267 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
268
269 return true;
270}
271
272FixupLEAPass::RegUsageState
273FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
274 RegUsageState RegUsage = RU_NotUsed;
275 MachineInstr &MI = *I;
276
277 for (const MachineOperand &MO : MI.operands()) {
278 if (MO.isReg() && MO.getReg() == p.getReg()) {
279 if (MO.isDef())
280 return RU_Write;
281 RegUsage = RU_Read;
282 }
283 }
284 return RegUsage;
285}
286
287/// getPreviousInstr - Given a reference to an instruction in a basic
288/// block, return a reference to the previous instruction in the block,
289/// wrapping around to the last instruction of the block if the block
290/// branches to itself.
293 if (I == MBB.begin()) {
294 if (MBB.isPredecessor(&MBB)) {
295 I = --MBB.end();
296 return true;
297 } else
298 return false;
299 }
300 --I;
301 return true;
302}
303
305FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
307 int InstrDistance = 1;
309 static const int INSTR_DISTANCE_THRESHOLD = 5;
310
311 CurInst = I;
312 bool Found;
313 Found = getPreviousInstr(CurInst, MBB);
314 while (Found && I != CurInst) {
315 if (CurInst->isCall() || CurInst->isInlineAsm())
316 break;
317 if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
318 break; // too far back to make a difference
319 if (usesRegister(p, CurInst) == RU_Write) {
320 return CurInst;
321 }
322 InstrDistance += TSM.computeInstrLatency(&*CurInst);
323 Found = getPreviousInstr(CurInst, MBB);
324 }
326}
327
328static inline bool isInefficientLEAReg(unsigned Reg) {
329 return Reg == X86::EBP || Reg == X86::RBP ||
330 Reg == X86::R13D || Reg == X86::R13;
331}
332
333/// Returns true if this LEA uses base an index registers, and the base register
334/// is known to be inefficient for the subtarget.
335// TODO: use a variant scheduling class to model the latency profile
336// of LEA instructions, and implement this logic as a scheduling predicate.
338 const MachineOperand &Index) {
339 return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
340 Index.getReg() != X86::NoRegister;
341}
342
343static inline bool hasLEAOffset(const MachineOperand &Offset) {
344 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
345}
346
347static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
348 switch (LEAOpcode) {
349 default:
350 llvm_unreachable("Unexpected LEA instruction");
351 case X86::LEA32r:
352 case X86::LEA64_32r:
353 return X86::ADD32rr;
354 case X86::LEA64r:
355 return X86::ADD64rr;
356 }
357}
358
359static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
360 switch (LEAOpcode) {
361 default:
362 llvm_unreachable("Unexpected LEA instruction");
363 case X86::LEA32r:
364 case X86::LEA64_32r:
365 return X86::SUB32rr;
366 case X86::LEA64r:
367 return X86::SUB64rr;
368 }
369}
370
371static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
372 const MachineOperand &Offset) {
373 switch (LEAOpcode) {
374 default:
375 llvm_unreachable("Unexpected LEA instruction");
376 case X86::LEA32r:
377 case X86::LEA64_32r:
378 return X86::ADD32ri;
379 case X86::LEA64r:
380 return X86::ADD64ri32;
381 }
382}
383
384static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
385 switch (LEAOpcode) {
386 default:
387 llvm_unreachable("Unexpected LEA instruction");
388 case X86::LEA32r:
389 case X86::LEA64_32r:
390 return IsINC ? X86::INC32r : X86::DEC32r;
391 case X86::LEA64r:
392 return IsINC ? X86::INC64r : X86::DEC64r;
393 }
394}
395
397FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
398 MachineBasicBlock &MBB) const {
399 const int InstrDistanceThreshold = 5;
400 int InstrDistance = 1;
401 MachineBasicBlock::iterator CurInst = std::next(I);
402
403 unsigned LEAOpcode = I->getOpcode();
404 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
405 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
406 Register DestReg = I->getOperand(0).getReg();
407
408 while (CurInst != MBB.end()) {
409 if (CurInst->isCall() || CurInst->isInlineAsm())
410 break;
411 if (InstrDistance > InstrDistanceThreshold)
412 break;
413
414 // Check if the lea dest register is used in an add/sub instruction only.
415 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
416 MachineOperand &Opnd = CurInst->getOperand(I);
417 if (Opnd.isReg()) {
418 if (Opnd.getReg() == DestReg) {
419 if (Opnd.isDef() || !Opnd.isKill())
421
422 unsigned AluOpcode = CurInst->getOpcode();
423 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
425
426 MachineOperand &Opnd2 = CurInst->getOperand(3 - I);
427 MachineOperand AluDest = CurInst->getOperand(0);
428 if (Opnd2.getReg() != AluDest.getReg())
430
431 // X - (Y + Z) may generate different flags than (X - Y) - Z when
432 // there is overflow. So we can't change the alu instruction if the
433 // flags register is live.
434 if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))
436
437 return CurInst;
438 }
439 if (TRI->regsOverlap(DestReg, Opnd.getReg()))
441 }
442 }
443
444 InstrDistance++;
445 ++CurInst;
446 }
448}
449
450void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
452 bool &BaseIndexDef, bool &AluDestRef,
453 MachineOperand **KilledBase,
454 MachineOperand **KilledIndex) const {
455 BaseIndexDef = AluDestRef = false;
456 *KilledBase = *KilledIndex = nullptr;
457 Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
458 Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
459 Register AluDestReg = AluI->getOperand(0).getReg();
460
461 for (MachineInstr &CurInst : llvm::make_range(std::next(LeaI), AluI)) {
462 for (MachineOperand &Opnd : CurInst.operands()) {
463 if (!Opnd.isReg())
464 continue;
465 Register Reg = Opnd.getReg();
466 if (TRI->regsOverlap(Reg, AluDestReg))
467 AluDestRef = true;
468 if (TRI->regsOverlap(Reg, BaseReg)) {
469 if (Opnd.isDef())
470 BaseIndexDef = true;
471 else if (Opnd.isKill())
472 *KilledBase = &Opnd;
473 }
474 if (TRI->regsOverlap(Reg, IndexReg)) {
475 if (Opnd.isDef())
476 BaseIndexDef = true;
477 else if (Opnd.isKill())
478 *KilledIndex = &Opnd;
479 }
480 }
481 }
482}
483
484bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
485 MachineBasicBlock &MBB) const {
486 // Look for an add/sub instruction which uses the result of lea.
487 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
488 if (AluI == MachineBasicBlock::iterator())
489 return false;
490
491 // Check if there are any related register usage between lea and alu.
492 bool BaseIndexDef, AluDestRef;
493 MachineOperand *KilledBase, *KilledIndex;
494 checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);
495
496 MachineBasicBlock::iterator InsertPos = AluI;
497 if (BaseIndexDef) {
498 if (AluDestRef)
499 return false;
500 InsertPos = I;
501 KilledBase = KilledIndex = nullptr;
502 }
503
504 // Check if there are same registers.
505 Register AluDestReg = AluI->getOperand(0).getReg();
506 Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();
507 Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();
508 if (I->getOpcode() == X86::LEA64_32r) {
509 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
510 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
511 }
512 if (AluDestReg == IndexReg) {
513 if (BaseReg == IndexReg)
514 return false;
515 std::swap(BaseReg, IndexReg);
516 std::swap(KilledBase, KilledIndex);
517 }
518 if (BaseReg == IndexReg)
519 KilledBase = nullptr;
520
521 // Now it's safe to change instructions.
522 MachineInstr *NewMI1, *NewMI2;
523 unsigned NewOpcode = AluI->getOpcode();
524 NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
525 AluDestReg)
526 .addReg(AluDestReg, RegState::Kill)
527 .addReg(BaseReg, KilledBase ? RegState::Kill : 0);
528 NewMI1->addRegisterDead(X86::EFLAGS, TRI);
529 NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),
530 AluDestReg)
531 .addReg(AluDestReg, RegState::Kill)
532 .addReg(IndexReg, KilledIndex ? RegState::Kill : 0);
533 NewMI2->addRegisterDead(X86::EFLAGS, TRI);
534
535 // Clear the old Kill flags.
536 if (KilledBase)
537 KilledBase->setIsKill(false);
538 if (KilledIndex)
539 KilledIndex->setIsKill(false);
540
541 MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);
542 MBB.erase(I);
543 MBB.erase(AluI);
544 I = NewMI1;
545 return true;
546}
547
548bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
549 MachineBasicBlock &MBB, bool OptIncDec,
550 bool UseLEAForSP) const {
551 MachineInstr &MI = *I;
552
553 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
554 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
555 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
556 const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp);
557 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
558
559 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
560 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
562 return false;
563
564 Register DestReg = MI.getOperand(0).getReg();
565 Register BaseReg = Base.getReg();
566 Register IndexReg = Index.getReg();
567
568 // Don't change stack adjustment LEAs.
569 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
570 return false;
571
572 // LEA64_32 has 64-bit operands but 32-bit result.
573 if (MI.getOpcode() == X86::LEA64_32r) {
574 if (BaseReg != 0)
575 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
576 if (IndexReg != 0)
577 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
578 }
579
580 MachineInstr *NewMI = nullptr;
581
582 // Case 1.
583 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
584 // which can be turned into add %reg2, %reg1
585 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
586 (DestReg == BaseReg || DestReg == IndexReg)) {
587 unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
588 if (DestReg != BaseReg)
589 std::swap(BaseReg, IndexReg);
590
591 if (MI.getOpcode() == X86::LEA64_32r) {
592 // TODO: Do we need the super register implicit use?
593 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
594 .addReg(BaseReg).addReg(IndexReg)
595 .addReg(Base.getReg(), RegState::Implicit)
596 .addReg(Index.getReg(), RegState::Implicit);
597 } else {
598 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
599 .addReg(BaseReg).addReg(IndexReg);
600 }
601 } else if (DestReg == BaseReg && IndexReg == 0) {
602 // Case 2.
603 // This is an LEA with only a base register and a displacement,
604 // We can use ADDri or INC/DEC.
605
606 // Does this LEA have one these forms:
607 // lea %reg, 1(%reg)
608 // lea %reg, -1(%reg)
609 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
610 bool IsINC = Disp.getImm() == 1;
611 unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
612
613 if (MI.getOpcode() == X86::LEA64_32r) {
614 // TODO: Do we need the super register implicit use?
615 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
616 .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
617 } else {
618 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
619 .addReg(BaseReg);
620 }
621 } else {
622 unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
623 if (MI.getOpcode() == X86::LEA64_32r) {
624 // TODO: Do we need the super register implicit use?
625 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
626 .addReg(BaseReg).addImm(Disp.getImm())
627 .addReg(Base.getReg(), RegState::Implicit);
628 } else {
629 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
630 .addReg(BaseReg).addImm(Disp.getImm());
631 }
632 }
633 } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
634 // Case 3.
635 // Look for and transform the sequence
636 // lea (reg1, reg2), reg3
637 // sub reg3, reg4
638 return optLEAALU(I, MBB);
639 } else
640 return false;
641
643 MBB.erase(I);
644 I = NewMI;
645 return true;
646}
647
648void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
650 // Process a load, store, or LEA instruction.
651 MachineInstr &MI = *I;
652 const MCInstrDesc &Desc = MI.getDesc();
653 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
654 if (AddrOffset >= 0) {
655 AddrOffset += X86II::getOperandBias(Desc);
656 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
657 if (p.isReg() && p.getReg() != X86::ESP) {
658 seekLEAFixup(p, I, MBB);
659 }
660 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
661 if (q.isReg() && q.getReg() != X86::ESP) {
662 seekLEAFixup(q, I, MBB);
663 }
664 }
665}
666
667void FixupLEAPass::seekLEAFixup(MachineOperand &p,
670 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
671 if (MBI != MachineBasicBlock::iterator()) {
672 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
673 if (NewMI) {
674 ++NumLEAs;
675 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
676 // now to replace with an equivalent LEA...
677 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
678 MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
679 MBB.erase(MBI);
681 static_cast<MachineBasicBlock::iterator>(NewMI);
682 processInstruction(J, MBB);
683 }
684 }
685}
686
687void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
689 MachineInstr &MI = *I;
690 const unsigned Opcode = MI.getOpcode();
691
692 const MachineOperand &Dst = MI.getOperand(0);
693 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
694 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
695 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
696 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
697 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
698
699 if (Segment.getReg() != 0 || !Offset.isImm() ||
700 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
702 return;
703 const Register DstR = Dst.getReg();
704 const Register SrcR1 = Base.getReg();
705 const Register SrcR2 = Index.getReg();
706 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
707 return;
708 if (Scale.getImm() > 1)
709 return;
710 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
711 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
712 MachineInstr *NewMI = nullptr;
713 // Make ADD instruction for two registers writing to LEA's destination
714 if (SrcR1 != 0 && SrcR2 != 0) {
715 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
716 const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
717 NewMI =
718 BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
719 LLVM_DEBUG(NewMI->dump(););
720 }
721 // Make ADD instruction for immediate
722 if (Offset.getImm() != 0) {
723 const MCInstrDesc &ADDri =
724 TII->get(getADDriFromLEA(Opcode, Offset));
725 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
726 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
727 .add(SrcR)
728 .addImm(Offset.getImm());
729 LLVM_DEBUG(NewMI->dump(););
730 }
731 if (NewMI) {
733 MBB.erase(I);
734 I = NewMI;
735 }
736}
737
738void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
740 bool OptIncDec) {
741 MachineInstr &MI = *I;
742 const unsigned LEAOpcode = MI.getOpcode();
743
744 const MachineOperand &Dest = MI.getOperand(0);
745 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
746 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
747 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
748 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
749 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
750
751 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
752 MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
754 Segment.getReg() != X86::NoRegister)
755 return;
756
757 Register DestReg = Dest.getReg();
758 Register BaseReg = Base.getReg();
759 Register IndexReg = Index.getReg();
760
761 if (MI.getOpcode() == X86::LEA64_32r) {
762 if (BaseReg != 0)
763 BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
764 if (IndexReg != 0)
765 IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
766 }
767
768 bool IsScale1 = Scale.getImm() == 1;
769 bool IsInefficientBase = isInefficientLEAReg(BaseReg);
770 bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
771
772 // Skip these cases since it takes more than 2 instructions
773 // to replace the LEA instruction.
774 if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
775 return;
776
777 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
778 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
779
780 MachineInstr *NewMI = nullptr;
781 bool BaseOrIndexIsDst = DestReg == BaseReg || DestReg == IndexReg;
782 // First try and remove the base while sticking with LEA iff base == index and
783 // scale == 1. We can handle:
784 // 1. lea D(%base,%index,1) -> lea D(,%index,2)
785 // 2. lea D(%r13/%rbp,%index) -> lea D(,%index,2)
786 // Only do this if the LEA would otherwise be split into 2-instruction
787 // (either it has a an Offset or neither base nor index are dst)
788 if (IsScale1 && BaseReg == IndexReg &&
789 (hasLEAOffset(Offset) || (IsInefficientBase && !BaseOrIndexIsDst))) {
790 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
791 .add(Dest)
792 .addReg(0)
793 .addImm(2)
794 .add(Index)
795 .add(Offset)
796 .add(Segment);
797 LLVM_DEBUG(NewMI->dump(););
798
800 MBB.erase(I);
801 I = NewMI;
802 return;
803 } else if (IsScale1 && BaseOrIndexIsDst) {
804 // Try to replace LEA with one or two (for the 3-op LEA case)
805 // add instructions:
806 // 1.lea (%base,%index,1), %base => add %index,%base
807 // 2.lea (%base,%index,1), %index => add %base,%index
808
809 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
810 if (DestReg != BaseReg)
811 std::swap(BaseReg, IndexReg);
812
813 if (MI.getOpcode() == X86::LEA64_32r) {
814 // TODO: Do we need the super register implicit use?
815 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
816 .addReg(BaseReg)
817 .addReg(IndexReg)
818 .addReg(Base.getReg(), RegState::Implicit)
819 .addReg(Index.getReg(), RegState::Implicit);
820 } else {
821 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
822 .addReg(BaseReg)
823 .addReg(IndexReg);
824 }
825 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
826 // If the base is inefficient try switching the index and base operands,
827 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
828 // lea offset(%base,%index,scale),%dst =>
829 // lea (%base,%index,scale); add offset,%dst
830 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
831 .add(Dest)
832 .add(IsInefficientBase ? Index : Base)
833 .add(Scale)
834 .add(IsInefficientBase ? Base : Index)
835 .addImm(0)
836 .add(Segment);
837 LLVM_DEBUG(NewMI->dump(););
838 }
839
840 // If either replacement succeeded above, add the offset if needed, then
841 // replace the instruction.
842 if (NewMI) {
843 // Create ADD instruction for the Offset in case of 3-Ops LEA.
844 if (hasLEAOffset(Offset)) {
845 if (OptIncDec && Offset.isImm() &&
846 (Offset.getImm() == 1 || Offset.getImm() == -1)) {
847 unsigned NewOpc =
848 getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
849 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
850 .addReg(DestReg);
851 LLVM_DEBUG(NewMI->dump(););
852 } else {
853 unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
854 NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
855 .addReg(DestReg)
856 .add(Offset);
857 LLVM_DEBUG(NewMI->dump(););
858 }
859 }
860
862 MBB.erase(I);
863 I = NewMI;
864 return;
865 }
866
867 // Handle the rest of the cases with inefficient base register:
868 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
869 assert(IsInefficientBase && "efficient base should be handled already!");
870
871 // FIXME: Handle LEA64_32r.
872 if (LEAOpcode == X86::LEA64_32r)
873 return;
874
875 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
876 if (IsScale1 && !hasLEAOffset(Offset)) {
877 bool BIK = Base.isKill() && BaseReg != IndexReg;
878 TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
879 LLVM_DEBUG(MI.getPrevNode()->dump(););
880
881 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
882 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
883 .addReg(DestReg)
884 .add(Index);
885 LLVM_DEBUG(NewMI->dump(););
886
888 MBB.erase(I);
889 I = NewMI;
890 return;
891 }
892
893 // lea offset(%base,%index,scale), %dst =>
894 // lea offset( ,%index,scale), %dst; add %base,%dst
895 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
896 .add(Dest)
897 .addReg(0)
898 .add(Scale)
899 .add(Index)
900 .add(Offset)
901 .add(Segment);
902 LLVM_DEBUG(NewMI->dump(););
903
904 unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
905 NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
906 .addReg(DestReg)
907 .add(Base);
908 LLVM_DEBUG(NewMI->dump(););
909
911 MBB.erase(I);
912 I = NewMI;
913}
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*–===//
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC)
static bool isLEA(unsigned Opcode)
static bool hasLEAOffset(const MachineOperand &Offset)
static bool hasInefficientLEABaseReg(const MachineOperand &Base, const MachineOperand &Index)
Returns true if this LEA uses base an index registers, and the base register is known to be inefficie...
static unsigned getADDriFromLEA(unsigned LEAOpcode, const MachineOperand &Offset)
static bool getPreviousInstr(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB)
getPreviousInstr - Given a reference to an instruction in a basic block, return a reference to the pr...
#define FIXUPLEA_DESC
static unsigned getADDrrFromLEA(unsigned LEAOpcode)
#define FIXUPLEA_NAME
static bool isInefficientLEAReg(unsigned Reg)
static unsigned getSUBrrFromLEA(unsigned LEAOpcode)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:647
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
Emit instructions to copy a pair of physical registers.
This is an alternative analysis pass to MachineBlockFrequencyInfo.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
bool isPredecessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a predecessor of this block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
void substituteDebugValuesForInst(const MachineInstr &Old, MachineInstr &New, unsigned MaxOperand=UINT_MAX)
Create substitutions for any tracked values in Old, to point at New.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Provide an instruction scheduling machine model to CodeGen passes.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
int getMemoryOperandNo(uint64_t TSFlags)
The function returns the MCInst operand # for the first field of the memory operand.
Definition: X86BaseInfo.h:1095
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:1055
@ AddrScaleAmt
Definition: X86BaseInfo.h:33
@ AddrSegmentReg
AddrSegmentReg - The operand # of the segment in the memory operand.
Definition: X86BaseInfo.h:38
@ AddrIndexReg
Definition: X86BaseInfo.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createX86FixupLEAs()
Return a pass that selectively replaces certain instructions (like add, sub, inc, dec,...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Description of the encoding of one expression Op.