LLVM 20.0.0git
Thumb2SizeReduction.cpp
Go to the documentation of this file.
1//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARM.h"
10#include "ARMBaseInstrInfo.h"
11#include "ARMSubtarget.h"
13#include "Thumb2InstrInfo.h"
14#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/StringRef.h"
28#include "llvm/IR/DebugLoc.h"
29#include "llvm/IR/Function.h"
30#include "llvm/MC/MCAsmInfo.h"
31#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/Support/Debug.h"
38#include <algorithm>
39#include <cassert>
40#include <cstdint>
41#include <functional>
42#include <iterator>
43#include <utility>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "thumb2-reduce-size"
48#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
49
50STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
51STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
52STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
53
54static cl::opt<int> ReduceLimit("t2-reduce-limit",
55 cl::init(-1), cl::Hidden);
56static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
57 cl::init(-1), cl::Hidden);
58static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
59 cl::init(-1), cl::Hidden);
60
61namespace {
62
63 /// ReduceTable - A static table with information on mapping from wide
64 /// opcodes to narrow
65 struct ReduceEntry {
66 uint16_t WideOpc; // Wide opcode
67 uint16_t NarrowOpc1; // Narrow opcode to transform to
68 uint16_t NarrowOpc2; // Narrow opcode when it's two-address
69 uint8_t Imm1Limit; // Limit of immediate field (bits)
70 uint8_t Imm2Limit; // Limit of immediate field when it's two-address
71 unsigned LowRegs1 : 1; // Only possible if low-registers are used
72 unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
73 unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
74 // 1 - No cc field.
75 // 2 - Always set CPSR.
76 unsigned PredCC2 : 2;
77 unsigned PartFlag : 1; // 16-bit instruction does partial flag update
78 unsigned Special : 1; // Needs to be dealt with specially
79 unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
80 };
81
82 static const ReduceEntry ReduceTable[] = {
83 // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
84 { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
85 { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
86 { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
87 { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
88 { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
89 { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
90 { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
91 { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
92 { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
93 //FIXME: Disable CMN, as CCodes are backwards from compare expectations
94 //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
95 { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
96 { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
97 { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
98 { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
99 // FIXME: adr.n immediate offset must be multiple of 4.
100 //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
101 { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
102 { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
103 { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
104 { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
105 { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
106 { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
107 // FIXME: Do we need the 16-bit 'S' variant?
108 { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
109 { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
110 { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
111 { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
112 { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
113 { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
114 { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
115 { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
116 { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
117 { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
118 { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
119 { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
120 { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
121 { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
122 { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
123 { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
124 { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
125 { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
126 { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
127 { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
128 { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
129
130 // FIXME: Clean this up after splitting each Thumb load / store opcode
131 // into multiple ones.
132 { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
133 { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
134 { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
135 { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
136 { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
137 { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
138 { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
139 { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
140 { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
141 { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
142 { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
143 { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
144 { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
145 { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
146 { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
147 { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
148
149 { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
150 { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
151 { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
152 // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
153 // tSTMIA_UPD is a change in semantics which can only be used if the base
154 // register is killed. This difference is correctly handled elsewhere.
155 { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
156 { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
157 { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
158 };
159
160 class Thumb2SizeReduce : public MachineFunctionPass {
161 public:
162 static char ID;
163
164 const Thumb2InstrInfo *TII;
165 const ARMSubtarget *STI;
166
167 Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr);
168
169 bool runOnMachineFunction(MachineFunction &MF) override;
170
173 MachineFunctionProperties::Property::NoVRegs);
174 }
175
176 StringRef getPassName() const override {
178 }
179
180 private:
181 /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
182 DenseMap<unsigned, unsigned> ReduceOpcodeMap;
183
184 bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
185
186 bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
187 bool is2Addr, ARMCC::CondCodes Pred,
188 bool LiveCPSR, bool &HasCC, bool &CCDead);
189
190 bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
191 const ReduceEntry &Entry);
192
193 bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
194 const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
195
196 /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
197 /// instruction.
198 bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
199 const ReduceEntry &Entry, bool LiveCPSR,
200 bool IsSelfLoop);
201
202 /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
203 /// non-two-address instruction.
204 bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
205 const ReduceEntry &Entry, bool LiveCPSR,
206 bool IsSelfLoop);
207
208 /// ReduceMI - Attempt to reduce MI, return true on success.
209 bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR,
210 bool IsSelfLoop, bool SkipPrologueEpilogue);
211
212 /// ReduceMBB - Reduce width of instructions in the specified basic block.
213 bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue);
214
215 bool OptimizeSize;
216 bool MinimizeSize;
217
218 // Last instruction to define CPSR in the current block.
219 MachineInstr *CPSRDef;
220 // Was CPSR last defined by a high latency instruction?
221 // When CPSRDef is null, this refers to CPSR defs in predecessors.
222 bool HighLatencyCPSR;
223
224 struct MBBInfo {
225 // The flags leaving this block have high latency.
226 bool HighLatencyCPSR = false;
227 // Has this block been visited yet?
228 bool Visited = false;
229
230 MBBInfo() = default;
231 };
232
233 SmallVector<MBBInfo, 8> BlockInfo;
234
235 std::function<bool(const Function &)> PredicateFtor;
236 };
237
238 char Thumb2SizeReduce::ID = 0;
239
240} // end anonymous namespace
241
243 false)
244
245Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
246 : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
247 OptimizeSize = MinimizeSize = false;
248 for (unsigned i = 0, e = std::size(ReduceTable); i != e; ++i) {
249 unsigned FromOpc = ReduceTable[i].WideOpc;
250 if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
251 llvm_unreachable("Duplicated entries?");
252 }
253}
254
255static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
256 return is_contained(MCID.implicit_defs(), ARM::CPSR);
257}
258
259// Check for a likely high-latency flag def.
261 switch(Def->getOpcode()) {
262 case ARM::FMSTAT:
263 case ARM::tMUL:
264 return true;
265 }
266 return false;
267}
268
269/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
270/// the 's' 16-bit instruction partially update CPSR. Abort the
271/// transformation to avoid adding false dependency on last CPSR setting
272/// instruction which hurts the ability for out-of-order execution engine
273/// to do register renaming magic.
274/// This function checks if there is a read-of-write dependency between the
275/// last instruction that defines the CPSR and the current instruction. If there
276/// is, then there is no harm done since the instruction cannot be retired
277/// before the CPSR setting instruction anyway.
278/// Note, we are not doing full dependency analysis here for the sake of compile
279/// time. We're not looking for cases like:
280/// r0 = muls ...
281/// r1 = add.w r0, ...
282/// ...
283/// = mul.w r1
284/// In this case it would have been ok to narrow the mul.w to muls since there
285/// are indirect RAW dependency between the muls and the mul.w
286bool
287Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
288 // Disable the check for -Oz (aka OptimizeForSizeHarder).
289 if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
290 return false;
291
292 if (!CPSRDef)
293 // If this BB loops back to itself, conservatively avoid narrowing the
294 // first instruction that does partial flag update.
295 return HighLatencyCPSR || FirstInSelfLoop;
296
298 for (const MachineOperand &MO : CPSRDef->operands()) {
299 if (!MO.isReg() || MO.isUndef() || MO.isUse())
300 continue;
301 Register Reg = MO.getReg();
302 if (Reg == 0 || Reg == ARM::CPSR)
303 continue;
304 Defs.insert(Reg);
305 }
306
307 for (const MachineOperand &MO : Use->operands()) {
308 if (!MO.isReg() || MO.isUndef() || MO.isDef())
309 continue;
310 Register Reg = MO.getReg();
311 if (Defs.count(Reg))
312 return false;
313 }
314
315 // If the current CPSR has high latency, try to avoid the false dependency.
316 if (HighLatencyCPSR)
317 return true;
318
319 // tMOVi8 usually doesn't start long dependency chains, and there are a lot
320 // of them, so always shrink them when CPSR doesn't have high latency.
321 if (Use->getOpcode() == ARM::t2MOVi ||
322 Use->getOpcode() == ARM::t2MOVi16)
323 return false;
324
325 // No read-after-write dependency. The narrowing will add false dependency.
326 return true;
327}
328
329bool
330Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
331 bool is2Addr, ARMCC::CondCodes Pred,
332 bool LiveCPSR, bool &HasCC, bool &CCDead) {
333 if ((is2Addr && Entry.PredCC2 == 0) ||
334 (!is2Addr && Entry.PredCC1 == 0)) {
335 if (Pred == ARMCC::AL) {
336 // Not predicated, must set CPSR.
337 if (!HasCC) {
338 // Original instruction was not setting CPSR, but CPSR is not
339 // currently live anyway. It's ok to set it. The CPSR def is
340 // dead though.
341 if (!LiveCPSR) {
342 HasCC = true;
343 CCDead = true;
344 return true;
345 }
346 return false;
347 }
348 } else {
349 // Predicated, must not set CPSR.
350 if (HasCC)
351 return false;
352 }
353 } else if ((is2Addr && Entry.PredCC2 == 2) ||
354 (!is2Addr && Entry.PredCC1 == 2)) {
355 /// Old opcode has an optional def of CPSR.
356 if (HasCC)
357 return true;
358 // If old opcode does not implicitly define CPSR, then it's not ok since
359 // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
360 if (!HasImplicitCPSRDef(MI->getDesc()))
361 return false;
362 HasCC = true;
363 } else {
364 // 16-bit instruction does not set CPSR.
365 if (HasCC)
366 return false;
367 }
368
369 return true;
370}
371
373 unsigned Opc = MI->getOpcode();
374 bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
375 bool isLROk = (Opc == ARM::t2STMDB_UPD);
376 bool isSPOk = isPCOk || isLROk;
377 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
378 const MachineOperand &MO = MI->getOperand(i);
379 if (!MO.isReg() || MO.isImplicit())
380 continue;
381 Register Reg = MO.getReg();
382 if (Reg == 0 || Reg == ARM::CPSR)
383 continue;
384 if (isPCOk && Reg == ARM::PC)
385 continue;
386 if (isLROk && Reg == ARM::LR)
387 continue;
388 if (Reg == ARM::SP) {
389 if (isSPOk)
390 continue;
391 if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
392 // Special case for these ldr / str with sp as base register.
393 continue;
394 }
395 if (!isARMLowRegister(Reg))
396 return false;
397 }
398 return true;
399}
400
401bool
402Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
403 const ReduceEntry &Entry) {
404 if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
405 return false;
406
407 unsigned Scale = 1;
408 bool HasImmOffset = false;
409 bool HasShift = false;
410 bool HasOffReg = true;
411 bool isLdStMul = false;
412 unsigned Opc = Entry.NarrowOpc1;
413 unsigned OpNum = 3; // First 'rest' of operands.
414 uint8_t ImmLimit = Entry.Imm1Limit;
415
416 switch (Entry.WideOpc) {
417 default:
418 llvm_unreachable("Unexpected Thumb2 load / store opcode!");
419 case ARM::t2LDRi12:
420 case ARM::t2STRi12:
421 if (MI->getOperand(1).getReg() == ARM::SP) {
422 Opc = Entry.NarrowOpc2;
423 ImmLimit = Entry.Imm2Limit;
424 }
425
426 Scale = 4;
427 HasImmOffset = true;
428 HasOffReg = false;
429 break;
430 case ARM::t2LDRBi12:
431 case ARM::t2STRBi12:
432 HasImmOffset = true;
433 HasOffReg = false;
434 break;
435 case ARM::t2LDRHi12:
436 case ARM::t2STRHi12:
437 Scale = 2;
438 HasImmOffset = true;
439 HasOffReg = false;
440 break;
441 case ARM::t2LDRs:
442 case ARM::t2LDRBs:
443 case ARM::t2LDRHs:
444 case ARM::t2LDRSBs:
445 case ARM::t2LDRSHs:
446 case ARM::t2STRs:
447 case ARM::t2STRBs:
448 case ARM::t2STRHs:
449 HasShift = true;
450 OpNum = 4;
451 break;
452 case ARM::t2LDR_POST:
453 case ARM::t2STR_POST: {
454 if (!MinimizeSize)
455 return false;
456
457 if (!MI->hasOneMemOperand() ||
458 (*MI->memoperands_begin())->getAlign() < Align(4))
459 return false;
460
461 // We're creating a completely different type of load/store - LDM from LDR.
462 // For this reason we can't reuse the logic at the end of this function; we
463 // have to implement the MI building here.
464 bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
465 Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
466 Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
467 unsigned Offset = MI->getOperand(3).getImm();
468 unsigned PredImm = MI->getOperand(4).getImm();
469 Register PredReg = MI->getOperand(5).getReg();
472
473 if (Offset != 4)
474 return false;
475
476 // Add the 16-bit load / store instruction.
477 DebugLoc dl = MI->getDebugLoc();
478 auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
480 .addReg(Rn)
481 .addImm(PredImm)
482 .addReg(PredReg)
483 .addReg(Rt, IsStore ? 0 : RegState::Define);
484
485 // Transfer memoperands.
486 MIB.setMemRefs(MI->memoperands());
487
488 // Transfer MI flags.
489 MIB.setMIFlags(MI->getFlags());
490
491 // Kill the old instruction.
492 MI->eraseFromBundle();
493 ++NumLdSts;
494 return true;
495 }
496 case ARM::t2LDMIA: {
497 Register BaseReg = MI->getOperand(0).getReg();
498 assert(isARMLowRegister(BaseReg));
499
500 // For the non-writeback version (this one), the base register must be
501 // one of the registers being loaded.
502 bool isOK = false;
503 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) {
504 if (MO.getReg() == BaseReg) {
505 isOK = true;
506 break;
507 }
508 }
509
510 if (!isOK)
511 return false;
512
513 OpNum = 0;
514 isLdStMul = true;
515 break;
516 }
517 case ARM::t2STMIA: {
518 // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this
519 // if the base register is killed, as then it doesn't matter what its value
520 // is after the instruction.
521 if (!MI->getOperand(0).isKill())
522 return false;
523
524 // If the base register is in the register list and isn't the lowest
525 // numbered register (i.e. it's in operand 4 onwards) then with writeback
526 // the stored value is unknown, so we can't convert to tSTMIA_UPD.
527 Register BaseReg = MI->getOperand(0).getReg();
528 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4))
529 if (MO.getReg() == BaseReg)
530 return false;
531
532 break;
533 }
534 case ARM::t2LDMIA_RET: {
535 Register BaseReg = MI->getOperand(1).getReg();
536 if (BaseReg != ARM::SP)
537 return false;
538 Opc = Entry.NarrowOpc2; // tPOP_RET
539 OpNum = 2;
540 isLdStMul = true;
541 break;
542 }
543 case ARM::t2LDMIA_UPD:
544 case ARM::t2STMIA_UPD:
545 case ARM::t2STMDB_UPD: {
546 OpNum = 0;
547
548 Register BaseReg = MI->getOperand(1).getReg();
549 if (BaseReg == ARM::SP &&
550 (Entry.WideOpc == ARM::t2LDMIA_UPD ||
551 Entry.WideOpc == ARM::t2STMDB_UPD)) {
552 Opc = Entry.NarrowOpc2; // tPOP or tPUSH
553 OpNum = 2;
554 } else if (!isARMLowRegister(BaseReg) ||
555 (Entry.WideOpc != ARM::t2LDMIA_UPD &&
556 Entry.WideOpc != ARM::t2STMIA_UPD)) {
557 return false;
558 }
559
560 isLdStMul = true;
561 break;
562 }
563 }
564
565 unsigned OffsetReg = 0;
566 bool OffsetKill = false;
567 bool OffsetInternal = false;
568 if (HasShift) {
569 OffsetReg = MI->getOperand(2).getReg();
570 OffsetKill = MI->getOperand(2).isKill();
571 OffsetInternal = MI->getOperand(2).isInternalRead();
572
573 if (MI->getOperand(3).getImm())
574 // Thumb1 addressing mode doesn't support shift.
575 return false;
576 }
577
578 unsigned OffsetImm = 0;
579 if (HasImmOffset) {
580 OffsetImm = MI->getOperand(2).getImm();
581 unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
582
583 if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
584 // Make sure the immediate field fits.
585 return false;
586 }
587
588 // Add the 16-bit load / store instruction.
589 DebugLoc dl = MI->getDebugLoc();
590 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
591
592 // tSTMIA_UPD takes a defining register operand. We've already checked that
593 // the register is killed, so mark it as dead here.
594 if (Entry.WideOpc == ARM::t2STMIA)
595 MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
596
597 if (!isLdStMul) {
598 MIB.add(MI->getOperand(0));
599 MIB.add(MI->getOperand(1));
600
601 if (HasImmOffset)
602 MIB.addImm(OffsetImm / Scale);
603
604 assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
605
606 if (HasOffReg)
607 MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
608 getInternalReadRegState(OffsetInternal));
609 }
610
611 // Transfer the rest of operands.
612 for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum))
613 MIB.add(MO);
614
615 // Transfer memoperands.
616 MIB.setMemRefs(MI->memoperands());
617
618 // Transfer MI flags.
619 MIB.setMIFlags(MI->getFlags());
620
621 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
622 << " to 16-bit: " << *MIB);
623
625 ++NumLdSts;
626 return true;
627}
628
629bool
630Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
631 const ReduceEntry &Entry,
632 bool LiveCPSR, bool IsSelfLoop) {
633 unsigned Opc = MI->getOpcode();
634 if (Opc == ARM::t2ADDri) {
635 // If the source register is SP, try to reduce to tADDrSPi, otherwise
636 // it's a normal reduce.
637 if (MI->getOperand(1).getReg() != ARM::SP) {
638 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
639 return true;
640 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
641 }
642 // Try to reduce to tADDrSPi.
643 unsigned Imm = MI->getOperand(2).getImm();
644 // The immediate must be in range, the destination register must be a low
645 // reg, the predicate must be "always" and the condition flags must not
646 // be being set.
647 if (Imm & 3 || Imm > 1020)
648 return false;
649 if (!isARMLowRegister(MI->getOperand(0).getReg()))
650 return false;
651 if (MI->getOperand(3).getImm() != ARMCC::AL)
652 return false;
653 const MCInstrDesc &MCID = MI->getDesc();
654 if (MCID.hasOptionalDef() &&
655 MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
656 return false;
657
659 BuildMI(MBB, MI, MI->getDebugLoc(),
660 TII->get(ARM::tADDrSPi))
661 .add(MI->getOperand(0))
662 .add(MI->getOperand(1))
663 .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
665
666 // Transfer MI flags.
667 MIB.setMIFlags(MI->getFlags());
668
669 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
670 << " to 16-bit: " << *MIB);
671
673 ++NumNarrows;
674 return true;
675 }
676
677 if (Entry.LowRegs1 && !VerifyLowRegs(MI))
678 return false;
679
680 if (MI->mayLoadOrStore())
681 return ReduceLoadStore(MBB, MI, Entry);
682
683 switch (Opc) {
684 default: break;
685 case ARM::t2ADDSri:
686 case ARM::t2ADDSrr: {
687 Register PredReg;
688 if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
689 switch (Opc) {
690 default: break;
691 case ARM::t2ADDSri:
692 if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
693 return true;
694 [[fallthrough]];
695 case ARM::t2ADDSrr:
696 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
697 }
698 }
699 break;
700 }
701 case ARM::t2RSBri:
702 case ARM::t2RSBSri:
703 case ARM::t2SXTB:
704 case ARM::t2SXTH:
705 case ARM::t2UXTB:
706 case ARM::t2UXTH:
707 if (MI->getOperand(2).getImm() == 0)
708 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
709 break;
710 case ARM::t2MOVi16:
711 // Can convert only 'pure' immediate operands, not immediates obtained as
712 // globals' addresses.
713 if (MI->getOperand(1).isImm())
714 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
715 break;
716 case ARM::t2CMPrr: {
717 // Try to reduce to the lo-reg only version first. Why there are two
718 // versions of the instruction is a mystery.
719 // It would be nice to just have two entries in the main table that
720 // are prioritized, but the table assumes a unique entry for each
721 // source insn opcode. So for now, we hack a local entry record to use.
722 static const ReduceEntry NarrowEntry =
723 { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
724 if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
725 return true;
726 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
727 }
728 case ARM::t2TEQrr: {
729 Register PredReg;
730 // Can only convert to eors if we're not in an IT block.
731 if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
732 break;
733 // TODO if Operand 0 is not killed but Operand 1 is, then we could write
734 // to Op1 instead.
735 if (MI->getOperand(0).isKill())
736 return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
737 }
738 }
739 return false;
740}
741
742bool
743Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
744 const ReduceEntry &Entry,
745 bool LiveCPSR, bool IsSelfLoop) {
746 if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
747 return false;
748
749 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
750 // Don't issue movs with shifter operand for some CPUs unless we
751 // are optimizing for size.
752 return false;
753
754 Register Reg0 = MI->getOperand(0).getReg();
755 Register Reg1 = MI->getOperand(1).getReg();
756 // t2MUL is "special". The tied source operand is second, not first.
757 if (MI->getOpcode() == ARM::t2MUL) {
758 Register Reg2 = MI->getOperand(2).getReg();
759 // Early exit if the regs aren't all low regs.
760 if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
761 || !isARMLowRegister(Reg2))
762 return false;
763 if (Reg0 != Reg2) {
764 // If the other operand also isn't the same as the destination, we
765 // can't reduce.
766 if (Reg1 != Reg0)
767 return false;
768 // Try to commute the operands to make it a 2-address instruction.
769 MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
770 if (!CommutedMI)
771 return false;
772 }
773 } else if (Reg0 != Reg1) {
774 // Try to commute the operands to make it a 2-address instruction.
775 unsigned CommOpIdx1 = 1;
776 unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
777 if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
778 MI->getOperand(CommOpIdx2).getReg() != Reg0)
779 return false;
780 MachineInstr *CommutedMI =
781 TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
782 if (!CommutedMI)
783 return false;
784 }
785 if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
786 return false;
787 if (Entry.Imm2Limit) {
788 unsigned Imm = MI->getOperand(2).getImm();
789 unsigned Limit = (1 << Entry.Imm2Limit) - 1;
790 if (Imm > Limit)
791 return false;
792 } else {
793 Register Reg2 = MI->getOperand(2).getReg();
794 if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
795 return false;
796 }
797
798 // Check if it's possible / necessary to transfer the predicate.
799 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
800 Register PredReg;
801 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
802 bool SkipPred = false;
803 if (Pred != ARMCC::AL) {
804 if (!NewMCID.isPredicable())
805 // Can't transfer predicate, fail.
806 return false;
807 } else {
808 SkipPred = !NewMCID.isPredicable();
809 }
810
811 bool HasCC = false;
812 bool CCDead = false;
813 const MCInstrDesc &MCID = MI->getDesc();
814 if (MCID.hasOptionalDef()) {
815 unsigned NumOps = MCID.getNumOperands();
816 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
817 if (HasCC && MI->getOperand(NumOps-1).isDead())
818 CCDead = true;
819 }
820 if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
821 return false;
822
823 // Avoid adding a false dependency on partial flag update by some 16-bit
824 // instructions which has the 's' bit set.
825 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
826 canAddPseudoFlagDep(MI, IsSelfLoop))
827 return false;
828
829 // Add the 16-bit instruction.
830 DebugLoc dl = MI->getDebugLoc();
831 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
832 MIB.add(MI->getOperand(0));
833 if (NewMCID.hasOptionalDef())
834 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
835
836 // Transfer the rest of operands.
837 unsigned NumOps = MCID.getNumOperands();
838 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
839 if (i < NumOps && MCID.operands()[i].isOptionalDef())
840 continue;
841 if (SkipPred && MCID.operands()[i].isPredicate())
842 continue;
843 MIB.add(MI->getOperand(i));
844 }
845
846 // Transfer MI flags.
847 MIB.setMIFlags(MI->getFlags());
848
849 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
850 << " to 16-bit: " << *MIB);
851
853 ++Num2Addrs;
854 return true;
855}
856
857bool
858Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
859 const ReduceEntry &Entry,
860 bool LiveCPSR, bool IsSelfLoop) {
861 if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
862 return false;
863
864 if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
865 // Don't issue movs with shifter operand for some CPUs unless we
866 // are optimizing for size.
867 return false;
868
869 unsigned Limit = ~0U;
870 if (Entry.Imm1Limit)
871 Limit = (1 << Entry.Imm1Limit) - 1;
872
873 const MCInstrDesc &MCID = MI->getDesc();
874 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
875 if (MCID.operands()[i].isPredicate())
876 continue;
877 const MachineOperand &MO = MI->getOperand(i);
878 if (MO.isReg()) {
879 Register Reg = MO.getReg();
880 if (!Reg || Reg == ARM::CPSR)
881 continue;
882 if (Entry.LowRegs1 && !isARMLowRegister(Reg))
883 return false;
884 } else if (MO.isImm() && !MCID.operands()[i].isPredicate()) {
885 if (((unsigned)MO.getImm()) > Limit)
886 return false;
887 }
888 }
889
890 // Check if it's possible / necessary to transfer the predicate.
891 const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
892 Register PredReg;
893 ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
894 bool SkipPred = false;
895 if (Pred != ARMCC::AL) {
896 if (!NewMCID.isPredicable())
897 // Can't transfer predicate, fail.
898 return false;
899 } else {
900 SkipPred = !NewMCID.isPredicable();
901 }
902
903 bool HasCC = false;
904 bool CCDead = false;
905 if (MCID.hasOptionalDef()) {
906 unsigned NumOps = MCID.getNumOperands();
907 HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
908 if (HasCC && MI->getOperand(NumOps-1).isDead())
909 CCDead = true;
910 }
911 if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
912 return false;
913
914 // Avoid adding a false dependency on partial flag update by some 16-bit
915 // instructions which has the 's' bit set.
916 if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
917 canAddPseudoFlagDep(MI, IsSelfLoop))
918 return false;
919
920 // Add the 16-bit instruction.
921 DebugLoc dl = MI->getDebugLoc();
922 MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
923
924 // TEQ is special in that it doesn't define a register but we're converting
925 // it into an EOR which does. So add the first operand as a def and then
926 // again as a use.
927 if (MCID.getOpcode() == ARM::t2TEQrr) {
928 MIB.add(MI->getOperand(0));
929 MIB->getOperand(0).setIsKill(false);
930 MIB->getOperand(0).setIsDef(true);
931 MIB->getOperand(0).setIsDead(true);
932
933 if (NewMCID.hasOptionalDef())
934 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
935 MIB.add(MI->getOperand(0));
936 } else {
937 MIB.add(MI->getOperand(0));
938 if (NewMCID.hasOptionalDef())
939 MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
940 }
941
942 // Transfer the rest of operands.
943 unsigned NumOps = MCID.getNumOperands();
944 for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
945 if (i < NumOps && MCID.operands()[i].isOptionalDef())
946 continue;
947 if ((MCID.getOpcode() == ARM::t2RSBSri ||
948 MCID.getOpcode() == ARM::t2RSBri ||
949 MCID.getOpcode() == ARM::t2SXTB ||
950 MCID.getOpcode() == ARM::t2SXTH ||
951 MCID.getOpcode() == ARM::t2UXTB ||
952 MCID.getOpcode() == ARM::t2UXTH) && i == 2)
953 // Skip the zero immediate operand, it's now implicit.
954 continue;
955 bool isPred = (i < NumOps && MCID.operands()[i].isPredicate());
956 if (SkipPred && isPred)
957 continue;
958 const MachineOperand &MO = MI->getOperand(i);
959 if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
960 // Skip implicit def of CPSR. Either it's modeled as an optional
961 // def now or it's already an implicit def on the new instruction.
962 continue;
963 MIB.add(MO);
964 }
965 if (!MCID.isPredicable() && NewMCID.isPredicable())
966 MIB.add(predOps(ARMCC::AL));
967
968 // Transfer MI flags.
969 MIB.setMIFlags(MI->getFlags());
970
971 LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
972 << " to 16-bit: " << *MIB);
973
975 ++NumNarrows;
976 return true;
977}
978
979static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
980 bool HasDef = false;
981 for (const MachineOperand &MO : MI.operands()) {
982 if (!MO.isReg() || MO.isUndef() || MO.isUse())
983 continue;
984 if (MO.getReg() != ARM::CPSR)
985 continue;
986
987 DefCPSR = true;
988 if (!MO.isDead())
989 HasDef = true;
990 }
991
992 return HasDef || LiveCPSR;
993}
994
995static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
996 for (const MachineOperand &MO : MI.operands()) {
997 if (!MO.isReg() || MO.isUndef() || MO.isDef())
998 continue;
999 if (MO.getReg() != ARM::CPSR)
1000 continue;
1001 assert(LiveCPSR && "CPSR liveness tracking is wrong!");
1002 if (MO.isKill()) {
1003 LiveCPSR = false;
1004 break;
1005 }
1006 }
1007
1008 return LiveCPSR;
1009}
1010
1011bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
1012 bool LiveCPSR, bool IsSelfLoop,
1013 bool SkipPrologueEpilogue) {
1014 unsigned Opcode = MI->getOpcode();
1015 DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
1016 if (OPI == ReduceOpcodeMap.end())
1017 return false;
1018 if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) ||
1019 MI->getFlag(MachineInstr::FrameDestroy)))
1020 return false;
1021 const ReduceEntry &Entry = ReduceTable[OPI->second];
1022
1023 // Don't attempt normal reductions on "special" cases for now.
1024 if (Entry.Special)
1025 return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
1026
1027 // Try to transform to a 16-bit two-address instruction.
1028 if (Entry.NarrowOpc2 &&
1029 ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1030 return true;
1031
1032 // Try to transform to a 16-bit non-two-address instruction.
1033 if (Entry.NarrowOpc1 &&
1034 ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
1035 return true;
1036
1037 return false;
1038}
1039
1040bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB,
1041 bool SkipPrologueEpilogue) {
1042 bool Modified = false;
1043
1044 // Yes, CPSR could be livein.
1045 bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
1046 MachineInstr *BundleMI = nullptr;
1047
1048 CPSRDef = nullptr;
1049 HighLatencyCPSR = false;
1050
1051 // Check predecessors for the latest CPSRDef.
1052 for (auto *Pred : MBB.predecessors()) {
1053 const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
1054 if (!PInfo.Visited) {
1055 // Since blocks are visited in RPO, this must be a back-edge.
1056 continue;
1057 }
1058 if (PInfo.HighLatencyCPSR) {
1059 HighLatencyCPSR = true;
1060 break;
1061 }
1062 }
1063
1064 // If this BB loops back to itself, conservatively avoid narrowing the
1065 // first instruction that does partial flag update.
1066 bool IsSelfLoop = MBB.isSuccessor(&MBB);
1069 for (; MII != E; MII = NextMII) {
1070 NextMII = std::next(MII);
1071
1072 MachineInstr *MI = &*MII;
1073 if (MI->isBundle()) {
1074 BundleMI = MI;
1075 continue;
1076 }
1077 if (MI->isDebugInstr())
1078 continue;
1079
1080 LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
1081
1082 // Does NextMII belong to the same bundle as MI?
1083 bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
1084
1085 if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
1086 Modified = true;
1087 MachineBasicBlock::instr_iterator I = std::prev(NextMII);
1088 MI = &*I;
1089 // Removing and reinserting the first instruction in a bundle will break
1090 // up the bundle. Fix the bundling if it was broken.
1091 if (NextInSameBundle && !NextMII->isBundledWithPred())
1092 NextMII->bundleWithPred();
1093 }
1094
1095 if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
1096 // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
1097 // marker is only on the BUNDLE instruction. Process the BUNDLE
1098 // instruction as we finish with the bundled instruction to work around
1099 // the inconsistency.
1100 if (BundleMI->killsRegister(ARM::CPSR, /*TRI=*/nullptr))
1101 LiveCPSR = false;
1102 MachineOperand *MO =
1103 BundleMI->findRegisterDefOperand(ARM::CPSR, /*TRI=*/nullptr);
1104 if (MO && !MO->isDead())
1105 LiveCPSR = true;
1106 MO = BundleMI->findRegisterUseOperand(ARM::CPSR, /*TRI=*/nullptr);
1107 if (MO && !MO->isKill())
1108 LiveCPSR = true;
1109 }
1110
1111 bool DefCPSR = false;
1112 LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
1113 if (MI->isCall()) {
1114 // Calls don't really set CPSR.
1115 CPSRDef = nullptr;
1116 HighLatencyCPSR = false;
1117 IsSelfLoop = false;
1118 } else if (DefCPSR) {
1119 // This is the last CPSR defining instruction.
1120 CPSRDef = MI;
1121 HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
1122 IsSelfLoop = false;
1123 }
1124 }
1125
1126 MBBInfo &Info = BlockInfo[MBB.getNumber()];
1127 Info.HighLatencyCPSR = HighLatencyCPSR;
1128 Info.Visited = true;
1129 return Modified;
1130}
1131
1132bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
1133 if (PredicateFtor && !PredicateFtor(MF.getFunction()))
1134 return false;
1135
1136 STI = &MF.getSubtarget<ARMSubtarget>();
1137 if (STI->isThumb1Only() || STI->prefers32BitThumb())
1138 return false;
1139
1140 TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
1141
1142 // Optimizing / minimizing size? Minimizing size implies optimizing for size.
1143 OptimizeSize = MF.getFunction().hasOptSize();
1144 MinimizeSize = STI->hasMinSize();
1145
1146 BlockInfo.clear();
1147 BlockInfo.resize(MF.getNumBlockIDs());
1148
1149 // Visit blocks in reverse post-order so LastCPSRDef is known for all
1150 // predecessors.
1152 bool Modified = false;
1153 bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1155 for (MachineBasicBlock *MBB : RPOT)
1156 Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI);
1157 return Modified;
1158}
1159
1160/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
1161/// reduction pass.
1163 std::function<bool(const Function &)> Ftor) {
1164 return new Thumb2SizeReduce(std::move(Ftor));
1165}
aarch64 promote const
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Performs the initial survey of the specified function
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static cl::opt< int > ReduceLimit("t2-reduce-limit", cl::init(-1), cl::Hidden)
static cl::opt< int > ReduceLimitLdSt("t2-reduce-limit3", cl::init(-1), cl::Hidden)
static cl::opt< int > ReduceLimit2Addr("t2-reduce-limit2", cl::init(-1), cl::Hidden)
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID)
static bool isHighLatencyCPSR(MachineInstr *Def)
static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR)
static bool VerifyLowRegs(MachineInstr *MI)
#define THUMB2_SIZE_REDUCE_NAME
static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR)
#define DEBUG_TYPE
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isThumb1Only() const
Definition: ARMSubtarget.h:364
bool hasMinSize() const
Definition: ARMSubtarget.h:363
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:705
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:680
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
bool isPredicable() const
Return true if this instruction has a predicate operand that controls execution.
Definition: MCInstrDesc.h:338
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
instr_iterator instr_begin()
instr_iterator erase_instr(MachineInstr *I)
Remove an instruction from the instruction list and delete it.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Instructions::iterator instr_iterator
instr_iterator instr_end()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static const unsigned CommuteAnyOperandIndex
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:826
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Dead
Unused definition.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
unsigned getInternalReadRegState(bool B)
unsigned getKillRegState(bool B)
ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, Register &PredReg)
getInstrPredicate - If instruction is predicated, returns its predicate condition,...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
FunctionPass * createThumb2SizeReductionPass(std::function< bool(const Function &)> Ftor=nullptr)
createThumb2SizeReductionPass - Returns an instance of the Thumb2 size reduction pass.
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39