LLVM 23.0.0git
HexagonPostRAHandleQFP.cpp
Go to the documentation of this file.
1//===--------------------- HexagonPostRAHandleQFP.cpp --------------------------
2//===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===---------------------------------------------------------------------===//
9// For v79 and above, we generate qf operations for HVX which includes vadd,
10// vsub and vmpy instructions. These qf operations with qf operands are fast,
11// maintain similar accuracy as IEEE and saves power.
12//
13// However, these qf operands should always be converted back to IEEE format
14// when used in non-HVX instructions. This is because of how the qf values
15// are stored in memory. qf operands have 4 extra bits. If used in non-HVX
16// operations, these bits get dropped resulting in incorrect value being
17// used. So, before use in any non-HVX operation we need to convert these
18// qf values to IEEE format.
19//
20// During register allocation, when no more physical registers are available
21// the qf operands may be spilled to memory. This instantly causes loss of
22// accuracy. This pass prevents that by:
23// 1. Inserting qf type to IEEE type conversion instructions before the spill.
24// 2. Iterating over the uses of qf def (created before the spill) and
25// changing their opcodes to handle IEEE type operands for saturating
26// instructions. This is because, the refills will use IEEE type operands, but
27// the instructions will still assume qf operands. For non-saturating
28// instructions which uses qf, we incorporate a conversion to IEEE before that.
29// 3. Iterating over the uses of qf def created by the spill and replacing
30// them with appropiate opcode (which uses IEEE operands) for saturating
31// instructions. For non-saturating instructions which uses qf,
32// we incorporate a conversion to IEEE before that.
33// 4. Iterating over the copy instructions and checking their uses,
34// inserting conversions from qf to IEEE whenever required. The conversions
35// are inserted after their reaching def since there can be multiple defs
36// for use in non-SSA form.
37//
38// To get the use-def chains, we make use of Register DataFlow Graph (RDF),
39// since after register allocation SSA form is lost. This can be done during
40// spills and fills during Frame Lowering for register allocation. However,
41// that was abandoned due to the intermediate state of the code.
42// Liveness is preserved in this pass.
43//
44// NOTE:
45// Saturating instructions: Instructions for which transformation involves
46// only changing the opcode. Eg. vmpy(qf32, sf) saturates to vmpy(sf, sf) when
47// we see that the first operand is now a sf type.
48// Non-Saturating instructions: Instructions for which conversion(s) have
49// to be inserted. Eg. Vd.f8=Vu.qf16. If the use operand is now hf type,
50// we have to insert a conversion qf16 = hf before this instruction.
51//
52// FIXME tags have been added for potential errors, along with the underlying
53// assumption.
54// FIXME Implement v81 specific optimizations as below. At the moment, we add
55// converts.
56// Vd.qf16=Vu.hf
57// Vd.qf16=Vu.qf16
58// Vd.qf32=Vu.qf32
59// Vd.qf32=Vu.sf
60//===---------------------------------------------------------------------===//
61
71#include "llvm/CodeGen/Passes.h"
79#include "llvm/Support/Debug.h"
82
83#define DEBUG_TYPE "handle-qfp"
84
85using namespace llvm;
86using namespace rdf;
87
89
91 "disable-handle-qfp", cl::init(false),
92 cl::desc("Disable handling of Qfloat spills/refills after register "
93 "allocation."));
94
95// This static function gets all reached uses of a def.
96// When it encounters a phi node, it goes over the
97// reached uses of the phi node too.
98static void getAllRealUses(NodeAddr<DefNode *> DA, NodeSet &UNodeSet,
100 bool comprehensive = false) {
101 RegisterRef DR = DA.Addr->getRegRef(*G);
102 NodeAddr<StmtNode *> DefStmt = DA.Addr->getOwner(*G);
103 MachineInstr *Instr = DefStmt.Addr->getCode();
104 auto UseSet = L->getAllReachedUses(DR, DA);
105
106 for (auto UI : UseSet) {
107 NodeAddr<UseNode *> UA = G->addr<UseNode *>(UI);
108
109 /*LLVM_DEBUG(
110 NodeAddr<StmtNode *> UseStmt = UA.Addr->getOwner(*G);
111 MachineInstr* UseInstr = UseStmt.Addr->getCode();
112 if (UseInstr != nullptr)
113 {dbgs() << "\t\t[Reached Use]: "; UseInstr->dump();}
114 );*/
115
116 MachineFunction *MF = Instr->getMF();
117 const auto &HRI = MF->getSubtarget<HexagonSubtarget>().getRegisterInfo();
118 Register RR = UA.Addr->getRegRef(*G).Id;
119 if (HRI->isFakeReg(RR))
120 continue;
121
122 if (UA.Addr->getFlags() & NodeAttrs::PhiRef) {
123 NodeAddr<PhiNode *> PA = UA.Addr->getOwner(*G);
124 NodeId id = PA.Id;
125 const Liveness::RefMap &phiUse = L->getRealUses(id);
126 for (auto I : phiUse) {
127 if (!G->getPRI().alias(RegisterRef(I.first), DR))
128 continue;
129 auto phiUseSet = I.second;
130 for (auto phiUI : phiUseSet) {
131 NodeAddr<UseNode *> phiUA = G->addr<UseNode *>(phiUI.first);
132 UNodeSet.insert(phiUA.Id);
133 }
134 }
135 } else {
136 // FIXME Due to bug in RDF, check if the reaching def of the use
137 // reaches this instruction
138 if (comprehensive) {
139 UNodeSet.insert(UA.Id);
140 continue;
141 }
142 NodeAddr<StmtNode *> UseStmt = UA.Addr->getOwner(*G);
143 for (NodeAddr<UseNode *> UA : UseStmt.Addr->members_if(G->IsUse, *G)) {
144 NodeId QFPDefNode = UA.Addr->getReachingDef();
145 NodeAddr<DefNode *> RegDef = G->addr<DefNode *>(QFPDefNode);
146 // FIXME Reaching def computation error
147 if (QFPDefNode == 0)
148 continue;
149 NodeAddr<StmtNode *> RegStmt = RegDef.Addr->getOwner(*G);
150 MachineInstr *ReachDefInstr = RegStmt.Addr->getCode();
151 if (ReachDefInstr && ReachDefInstr == Instr)
152 UNodeSet.insert(UA.Id);
153 }
154 }
155 }
156}
157
158namespace llvm {
161} // namespace llvm
162
163// QF Instructions list which need to be analyzed.
164// The value of the key denotes a pair
165// pair.first|pair.second = True if IEEE type, false otherwise.
166// We only need to change the opcode to handling qf/sf
167// misuses for these, or these instructions can be 'saturated'.
169 {Hexagon::V6_vadd_qf16_mix, {false, true}},
170 {Hexagon::V6_vadd_qf16, {false, false}},
171 {Hexagon::V6_vadd_qf32_mix, {false, true}},
172 {Hexagon::V6_vadd_qf32, {false, false}},
173 {Hexagon::V6_vsub_qf16_mix, {false, true}},
174 {Hexagon::V6_vsub_hf_mix, {true, false}},
175 {Hexagon::V6_vsub_qf16, {false, false}},
176 {Hexagon::V6_vsub_qf32_mix, {false, true}},
177 {Hexagon::V6_vsub_sf_mix, {true, false}},
178 {Hexagon::V6_vsub_qf32, {false, false}},
179 {Hexagon::V6_vmpy_qf16_mix_hf, {false, true}},
180 {Hexagon::V6_vmpy_qf16, {false, false}},
181 {Hexagon::V6_vmpy_qf32_mix_hf, {false, true}},
182 {Hexagon::V6_vmpy_qf32_qf16, {false, false}},
183 {Hexagon::V6_vmpy_qf32, {false, false}},
184 {Hexagon::V6_vmpy_rt_qf16, {false, true}},
185 // These opcodes take a single operand only.
186 // Second placeholder op is true always.
187 {Hexagon::V6_vabs_qf32_qf32, {false, true}},
188 {Hexagon::V6_vabs_qf16_qf16, {false, true}},
189 {Hexagon::V6_vneg_qf32_qf32, {false, true}},
190 {Hexagon::V6_vneg_qf16_qf16, {false, true}},
191 {Hexagon::V6_vilog2_qf32, {false, true}},
192 {Hexagon::V6_vilog2_qf16, {false, true}},
193 {Hexagon::V6_vconv_qf32_qf32, {false, true}},
194 {Hexagon::V6_vconv_qf16_qf16, {false, true}},
195};
196
197// This holds the instruction opcodes for which there are
198// no 'saturating' opcodes. The only way is to insert
199// convert instructions before them.
201 Hexagon::V6_vconv_hf_qf16, Hexagon::V6_vconv_hf_qf32,
202 Hexagon::V6_vconv_sf_qf32,
203 // v81 instructions
204 Hexagon::V6_vconv_bf_qf32, Hexagon::V6_vconv_f8_qf16};
205
206namespace {
207class HexagonPostRAHandleQFP : public MachineFunctionPass {
208public:
209 static char ID;
210 HexagonPostRAHandleQFP() : MachineFunctionPass(ID) {
211 PassRegistry &R = *PassRegistry::getPassRegistry();
213 }
214 StringRef getPassName() const override {
215 return "Hexagon handle QFloat spills and refills post RA.";
216 }
217 void getAnalysisUsage(AnalysisUsage &AU) const override {
219 AU.addRequired<MachineDominatorTreeWrapperPass>();
220 AU.addRequired<MachineDominanceFrontierWrapperPass>();
221 AU.setPreservesCFG();
222 }
223 bool runOnMachineFunction(MachineFunction &MF) override;
224
225private:
226 // QFUses collects the instructions which uses QF operands.
227 // These have to be deleted and transformed to opcodes
228 // to denote usage of IEEE operands.
229 // It might involve changing the order of the Register operands.
230 using QFUses = std::map<MachineInstr *, std::pair<bool, bool>>;
231 QFUses QFUsesMap;
232
233 // Holds the Register Dataglow Graph.
234 DataFlowGraph *DFG = nullptr;
235
236 // Stores spill nodes and their reaching definition instructions
237 // which generates the qf operand to be stored.
238 std::vector<std::pair<MachineInstr *, NodeAddr<DefNode *>>> SpillMIs;
239 // Stores the refill nodes consisting of load instructions.
240 std::vector<NodeAddr<DefNode *>> RefillMIs;
241
242 // Stores the type of op.
243 enum ConvOperand {
244 Undefined = 0x0,
245 Lo = 0x1,
246 Hi = 0x2,
247 HiLo = 0x3,
248 };
249 // Stores the convert instructions which take qf operands.
250 MapVector<MachineInstr *, unsigned> QFNonSatMIs;
251
252 // Stores the qf-generating vmul/vadd/etc. nodes with mutiple reaching defs
253 std::set<NodeId> PossibleMultiReachDefs;
254 // Qf generating instructions to ignore. Do not insert conversion instruction
255 // to sf/hf from qf, if the instr is present in this list; since that means
256 // a conversion has already been inserted after the instruction.
257 SmallPtrSet<MachineInstr *, 4> IgnoreInsertConvList;
258
259 // Register type
260 enum class RegType { qf32, qf16, qf32_double, qf16_double, ieee, undefined };
261 // Stores the copy instructions which their reaching def, along with the op
262 // type
263 std::map<std::pair<NodeId, NodeId>, RegType> QFCopys;
264
265 // Stores the reaching defs of copies whose result has to be converted to IEEE
266 DenseMap<MachineInstr *, RegType> ReachDefOfCopies;
267
268 // Stores copies which need to be converted back to qf. The uses of these
269 // copies feed to qf type instructions and hence can be converted back to qf
270 // type.
271 DenseMap<MachineInstr *, std::pair<NodeAddr<DefNode *>, RegType>>
272 ConvertToQfCopies;
273
274 // Subregister kill set for a doubletype use. The pair of bool,bool
275 // represents the hi and lo subregisters of the double register.
276 DenseMap<MachineInstr *, std::pair<bool, bool>> SubRegKillSet;
277
278 const HexagonInstrInfo *HII = nullptr;
279 const HexagonRegisterInfo *HRI = nullptr;
280 MachineRegisterInfo *MRI = nullptr;
281 Liveness *LV = nullptr;
282 const HexagonSubtarget *HST = nullptr;
283
284 void collectQFPStackSpill(NodeAddr<StmtNode *> *);
285 void collectQFPStackRefill(NodeAddr<StmtNode *> *);
286 void collectCopies(NodeAddr<StmtNode *> *);
287 bool HandleRefills();
288 bool HandleSpills();
289 bool HandleCopies();
290 bool HandleNonSatInstr();
291 bool HandleMultiReachingDefs();
292 bool HandleReachDefOfCopies();
293 bool HandleConvertToQfCopies();
294 RegType HasQfUses(NodeAddr<DefNode *>, MachineInstr *);
295 void collectConvQFInstr(NodeAddr<DefNode *> &);
296 void collectQFUses(NodeAddr<DefNode *>, MachineInstr *DefMI);
297 void conditionallyInsert(MachineInstr &, Register &);
298
299 // Helper functions
300 unsigned short getreplacedQFOpcode(unsigned, bool, bool);
301 MCPhysReg findAllocatableReg(MachineInstr *MI) const;
302 void insertIEEEToQF(MachineInstr *, Register, MachineOperand, bool is32bit);
303 void collectLivenessForSubregs(NodeAddr<UseNode *> &);
304 void insertInstr(MachineInstr *, unsigned, unsigned, unsigned, RegState);
305};
306} // namespace
307
308// This class handles spurious vector instrutions which do not
309// follow the ABI. For eg, vcombine(qf,qf) takes qf operands
310// instead of IEEE type. This diagnostic pass can be used
311// as a final verifier for XQF implementation. Turned off by
312// default
313char HexagonPostRAHandleQFP::ID = 0;
314
315namespace llvm {
316char &HexagonPostRAHandleQFPID = HexagonPostRAHandleQFP::ID;
317}
318
319// Check whether the instruction is added already, if not add it
320// along with the Register values and qf type.
321// If already added, then check the register values and edit them.
322void HexagonPostRAHandleQFP::conditionallyInsert(MachineInstr &MI,
323 Register &DefReg) {
324 LLVM_DEBUG(dbgs() << "\nCollecting instruction using QF: "; MI.dump());
325 // check if the key exists.
326 Register Reg1 = MI.getOperand(1).getReg();
327
328 // If the use is a unary operation, make second register point to Defreg
329 // This ensures that secondOp is always true
330 Register Reg2 = MI.getNumOperands() == 2 ? DefReg : MI.getOperand(2).getReg();
331
332 if (QFUsesMap.find(&MI) != QFUsesMap.end()) {
333 auto Entry = QFUsesMap[&MI];
334 bool firstOp = ((Reg1 == DefReg) ? true : false) | Entry.first;
335 bool secondOp = ((Reg2 == DefReg) ? true : false) | Entry.second;
336 QFUsesMap[&MI] = std::make_pair(firstOp, secondOp);
337
338 } else { // encountered first time.
339 // Get the default type of the operand:
340 // True : IEEE type
341 // False : QF type
342 auto defaultPair = QFPSatInstsMap[MI.getOpcode()];
343 bool firstOp = (Reg1 == DefReg) ? true : defaultPair.first;
344 bool secondOp = (Reg2 == DefReg) ? true : defaultPair.second;
345 QFUsesMap[&MI] = std::make_pair(firstOp, secondOp);
346 }
347}
348
349unsigned short HexagonPostRAHandleQFP::getreplacedQFOpcode(unsigned srcOpcode,
350 bool firstOp,
351 bool secondOp) {
352 if (firstOp && secondOp) {
353 switch (srcOpcode) {
354 case Hexagon::V6_vadd_qf32:
355 case Hexagon::V6_vadd_qf32_mix:
356 return Hexagon::V6_vadd_sf;
357 case Hexagon::V6_vadd_qf16:
358 case Hexagon::V6_vadd_qf16_mix:
359 return Hexagon::V6_vadd_hf;
360
361 case Hexagon::V6_vsub_qf32:
362 case Hexagon::V6_vsub_qf32_mix:
363 case Hexagon::V6_vsub_sf_mix:
364 return Hexagon::V6_vsub_sf;
365 case Hexagon::V6_vsub_qf16:
366 case Hexagon::V6_vsub_qf16_mix:
367 case Hexagon::V6_vsub_hf_mix:
368 return Hexagon::V6_vsub_hf;
369
370 case Hexagon::V6_vmpy_qf32:
371 return Hexagon::V6_vmpy_qf32_sf;
372 case Hexagon::V6_vmpy_qf16:
373 case Hexagon::V6_vmpy_qf16_mix_hf:
374 return Hexagon::V6_vmpy_qf16_hf;
375 case Hexagon::V6_vmpy_qf32_qf16:
376 case Hexagon::V6_vmpy_qf32_mix_hf:
377 return Hexagon::V6_vmpy_qf32_hf;
378
379 case Hexagon::V6_vmpy_rt_qf16:
380 return Hexagon::V6_vmpy_rt_hf;
381 // v81 opcodes start
382 case Hexagon::V6_vabs_qf32_qf32:
383 return Hexagon::V6_vabs_qf32_sf;
384 case Hexagon::V6_vabs_qf16_qf16:
385 return Hexagon::V6_vabs_qf16_hf;
386 case Hexagon::V6_vneg_qf32_qf32:
387 return Hexagon::V6_vneg_qf32_sf;
388 case Hexagon::V6_vneg_qf16_qf16:
389 return Hexagon::V6_vneg_qf16_hf;
390 case Hexagon::V6_vilog2_qf32:
391 return Hexagon::V6_vilog2_sf;
392 case Hexagon::V6_vilog2_qf16:
393 return Hexagon::V6_vilog2_hf;
394 case Hexagon::V6_vconv_qf32_qf32:
395 return Hexagon::V6_vconv_qf32_sf;
396 case Hexagon::V6_vconv_qf16_qf16:
397 return Hexagon::V6_vconv_qf16_hf;
398 // v81 opcodes end
399
400 default:
401 llvm_unreachable("Invalid qf opcode in this scenario!");
402 }
403 } else if (firstOp) {
404 switch (srcOpcode) {
405 case Hexagon::V6_vadd_qf32:
406 return Hexagon::V6_vadd_qf32_mix; // interchange reqd
407 case Hexagon::V6_vadd_qf16:
408 return Hexagon::V6_vadd_qf16_mix; // interchange reqd
409
410 case Hexagon::V6_vsub_qf32:
411 if (HST->useHVXV81Ops())
412 return Hexagon::V6_vsub_sf_mix;
413 else if (HST->useHVXV79Ops())
414 return Hexagon::V6_vsub_sf; // conv reqd
415 else
416 llvm_unreachable("Invalid Hexagon Arch for this scenario!");
417 case Hexagon::V6_vsub_qf16:
418 if (HST->useHVXV81Ops())
419 return Hexagon::V6_vsub_hf_mix;
420 else if (HST->useHVXV79Ops())
421 return Hexagon::V6_vsub_hf; // conv reqd
422 else
423 llvm_unreachable("Invalid Hexagon Arch for this scenario!");
424 case Hexagon::V6_vsub_qf32_mix:
425 return Hexagon::V6_vsub_sf;
426 case Hexagon::V6_vsub_qf16_mix:
427 return Hexagon::V6_vsub_hf;
428
429 // This opcode does not have a mixed type. Hence if one
430 // of op1 or op2 is IEEE type and another qf type,
431 // send the opcode which takes in both as IEEE type.
432 case Hexagon::V6_vmpy_qf32:
433 return Hexagon::V6_vmpy_qf32_sf; // conv reqd
434 case Hexagon::V6_vmpy_qf16:
435 return Hexagon::V6_vmpy_qf16_mix_hf; // interchange reqd
436 case Hexagon::V6_vmpy_qf32_qf16:
437 return Hexagon::V6_vmpy_qf32_mix_hf; // interchange reqd
438
439 default:
440 return srcOpcode;
441 }
442 } else if (secondOp) {
443 switch (srcOpcode) {
444 case Hexagon::V6_vadd_qf32:
445 return Hexagon::V6_vadd_qf32_mix;
446 case Hexagon::V6_vadd_qf16:
447 return Hexagon::V6_vadd_qf16_mix;
448
449 case Hexagon::V6_vsub_qf32:
450 return Hexagon::V6_vsub_qf32_mix;
451 case Hexagon::V6_vsub_qf16:
452 return Hexagon::V6_vsub_qf16_mix;
453 case Hexagon::V6_vsub_sf_mix:
454 return Hexagon::V6_vsub_sf;
455 case Hexagon::V6_vsub_hf_mix:
456 return Hexagon::V6_vsub_hf;
457
458 case Hexagon::V6_vmpy_qf32:
459 return Hexagon::V6_vmpy_qf32_sf; // conv reqd
460
461 case Hexagon::V6_vmpy_qf16:
462 return Hexagon::V6_vmpy_qf16_mix_hf;
463 case Hexagon::V6_vmpy_qf32_qf16:
464 return Hexagon::V6_vmpy_qf32_mix_hf;
465
466 default:
467 return srcOpcode;
468 }
469 } else
470 return srcOpcode;
471}
472
473// Insert IEEE to Qf conversion instructions
474// is32bit: If true, SrcReg holds sf type, else a hf type
475void HexagonPostRAHandleQFP::insertIEEEToQF(MachineInstr *MI, Register SrcReg,
476 MachineOperand SrcOp,
477 bool is32bit = false) {
478
479 auto MBB = MI->getParent();
480 MachineInstrBuilder MIB;
481 const DebugLoc &DL = MI->getDebugLoc();
482
483 if (HST->useHVXV81Ops()) {
484 auto Op = is32bit ? Hexagon::V6_vconv_qf32_sf : Hexagon::V6_vconv_qf16_hf;
485 MIB = BuildMI(*MBB, *MI, DL, HII->get(Op), SrcReg)
486 .addReg(SrcReg, RegState::Renamable | RegState::Kill);
487 LLVM_DEBUG(dbgs() << "\nInserting new instruction: ";
488 MIB.getInstr()->dump());
489
490 } else if (HST->useHVXV79Ops()) {
491 // Get an available register
492 auto V0_Reg = findAllocatableReg(MI);
493
494 MIB = BuildMI(*MBB, *MI, DL, HII->get(Hexagon::V6_vd0), V0_Reg);
495 LLVM_DEBUG(dbgs() << "\nInserting new instruction: ";
496 MIB.getInstr()->dump());
497 auto Op = is32bit ? Hexagon::V6_vadd_sf : Hexagon::V6_vadd_hf;
498 MIB = BuildMI(*MBB, *MI, DL, HII->get(Op), SrcReg)
499 .addReg(SrcReg, RegState::Renamable | RegState::Kill)
500 .addReg(V0_Reg, RegState::Kill);
501 LLVM_DEBUG(dbgs() << "Inserting new instruction: "; MIB.getInstr()->dump());
502 } else
503 llvm_unreachable("Not possible to insert qf = hf/sf for this unknown\
504 subtarget!");
505}
506
507// Create a new instruction which handle sf/hf types to replace
508// qf type handling instructions.
509bool HexagonPostRAHandleQFP::HandleRefills() {
510
511 bool Changed = false;
512 LLVM_DEBUG(dbgs() << "HandleRefills: ");
513 std::vector<MachineInstr *> eraseList;
514
515 for (auto It : QFUsesMap) {
516
517 // Separately handle unary qf opcodes
518 MachineInstr *MI = It.first;
519 auto SrcOpcode = MI->getOpcode();
520 auto Pair = It.second;
521 auto SrcOp1 = MI->getOperand(1);
522 Register DestReg = MI->getOperand(0).getReg();
523 auto MBB = MI->getParent();
524 MachineInstrBuilder MIB;
525 LLVM_DEBUG(dbgs() << "\nProcessing: "; MI->dump());
526 const DebugLoc &DL = MI->getDebugLoc();
527
528 // lambda to handle unary qf operations
529 // ieee: True if the 1st operand is sf/hf type, false if qf type
530 auto HandleUnaryRefill = [&](MachineInstr *MI, bool isIeee) -> bool {
531 if (isIeee) {
532 auto finalOpcode = getreplacedQFOpcode(SrcOpcode, true, true);
533 MIB = BuildMI(*MBB, *MI, DL, HII->get(finalOpcode), DestReg)
534 .addReg(SrcOp1.getReg(), getRegState(SrcOp1));
535 Changed |= true;
536 LLVM_DEBUG(dbgs() << "\nInserting new instruction: ";
537 MIB.getInstr()->dump());
538 } else
539 eraseList.push_back(MI);
540 return Changed;
541 };
542
543 if (MI->getNumOperands() == 2) {
544 Changed |= HandleUnaryRefill(It.first, It.second.first);
545 continue;
546 }
547 auto SrcOp2 = MI->getOperand(2);
548
549 // lambda to handle mixed type vsub instructions for v79
550 auto HandleSub = [&](auto srcOpcode) -> bool {
551 auto ConvOp = (srcOpcode == Hexagon::V6_vsub_qf32)
552 ? Hexagon::V6_vconv_sf_qf32
553 : Hexagon::V6_vconv_hf_qf16;
554 auto SubOp = (ConvOp == Hexagon::V6_vconv_sf_qf32) ? Hexagon::V6_vsub_sf
555 : Hexagon::V6_vsub_hf;
556
557 Register SrcOp2Reg = SrcOp2.getReg();
558 MIB = BuildMI(*MBB, *MI, DL, HII->get(ConvOp), SrcOp2Reg)
559 .addReg(SrcOp2Reg, getRegState(SrcOp2) | RegState::Kill);
560 LLVM_DEBUG(dbgs() << "\nInserting new instruction: ";
561 MIB.getInstr()->dump());
562 MIB = BuildMI(*MBB, *MI, DL, HII->get(SubOp), DestReg)
563 .addReg(SrcOp1.getReg(), getRegState(SrcOp1))
564 .addReg(SrcOp2Reg, getRegState(SrcOp2));
565 // If Op2 is not killed, it is used after this instruction.
566 // convert it back to original qf form.
567 if (!SrcOp2.isKill())
568 insertIEEEToQF(&*(++MI->getIterator()), SrcOp2.getReg(), SrcOp2);
569 return true;
570 };
571
572 // If both operands are sf type, we only need to replace the opcode.
573 if (Pair.first == true && Pair.second == true) {
574 auto finalOpcode = getreplacedQFOpcode(SrcOpcode, true, true);
575 MIB = BuildMI(*MBB, *MI, DL, HII->get(finalOpcode), DestReg)
576 .addReg(SrcOp1.getReg(), getRegState(SrcOp1))
577 .addReg(SrcOp2.getReg(), getRegState(SrcOp2));
578 Changed |= true;
579 LLVM_DEBUG(dbgs() << "\nInserting new instruction: ";
580 MIB.getInstr()->dump());
581
582 } else if (Pair.first == true && Pair.second == false) {
583 auto finalOpcode = getreplacedQFOpcode(SrcOpcode, true, false);
584
585 // If 2nd op is qf, first op is sf, convert the 2nd
586 // op to sf before inserting the vmpy instruction.
587 if (SrcOpcode == Hexagon::V6_vmpy_qf32) {
588 Register SrcOp2Reg = SrcOp2.getReg();
589 MIB = BuildMI(*MBB, *MI, DL, HII->get(Hexagon::V6_vconv_sf_qf32),
590 SrcOp2Reg)
591 .addReg(SrcOp2Reg, getRegState(SrcOp2) | RegState::Kill);
592 LLVM_DEBUG(dbgs() << "\nInserting new instruction before: ";
593 MIB.getInstr()->dump());
594 MIB = BuildMI(*MBB, *MI, DL, HII->get(finalOpcode), DestReg)
595 .addReg(SrcOp1.getReg(), getRegState(SrcOp1))
596 .addReg(SrcOp2Reg, getRegState(SrcOp2));
597 // If Op2 is not killed convert back to qf, since there
598 // are uses for this qf op.
599 if (!SrcOp2.isKill())
600 insertIEEEToQF(&*(++MI->getIterator()), SrcOp2.getReg(), SrcOp2,
601 true /* sf type reg */);
602
603 // if the opcode is mixed type, we use Op2 as first operand
604 // since that takes in qf type. Op1 is taken as second op.
605 } else if (finalOpcode == Hexagon::V6_vadd_qf16_mix ||
606 finalOpcode == Hexagon::V6_vadd_qf32_mix ||
607 finalOpcode == Hexagon::V6_vmpy_qf16_mix_hf ||
608 finalOpcode == Hexagon::V6_vmpy_qf32_mix_hf) {
609 MIB = BuildMI(*MBB, *MI, DL, HII->get(finalOpcode), DestReg)
610 .addReg(SrcOp2.getReg(), getRegState(SrcOp2))
611 .addReg(SrcOp1.getReg(), getRegState(SrcOp1));
612
613 // Subtracting is not associative, so if Op1 is sf/hf type and
614 // Op2 is qf type, we cannot interchange the operands.
615 // For v79, we convert Op2 to IEEE and use the non-mix type
616 // instruction for the subtraction.
617 // For v81, we have an appropiate opcode with vsub(sf/hf, qf) type
618 } else if ((SrcOpcode == Hexagon::V6_vsub_qf32 ||
619 SrcOpcode == Hexagon::V6_vsub_qf16) &&
620 HST->useHVXV79Ops()) {
621 Changed |= HandleSub(SrcOpcode);
622
623 } else {
624 MIB = BuildMI(*MBB, *MI, DL, HII->get(finalOpcode), DestReg)
625 .addReg(SrcOp1.getReg(), getRegState(SrcOp1))
626 .addReg(SrcOp2.getReg(), getRegState(SrcOp2));
627 }
628 LLVM_DEBUG(dbgs() << "\nInserting new instruction: ";
629 MIB.getInstr()->dump());
630 Changed |= true;
631 } else if (Pair.first == false && Pair.second == true) {
632
633 auto finalOpcode = getreplacedQFOpcode(SrcOpcode, false, true);
634 // If 2nd op is sf, first op is qf, convert the 1st
635 // op to sf before inserting the vmpy instruction.
636 if (SrcOpcode == Hexagon::V6_vmpy_qf32) {
637 Register SrcOp1Reg = SrcOp1.getReg();
638 MIB = BuildMI(*MBB, *MI, DL, HII->get(Hexagon::V6_vconv_sf_qf32),
639 SrcOp1Reg)
640 .addReg(SrcOp1Reg, getRegState(SrcOp1) | RegState::Kill);
641 LLVM_DEBUG(dbgs() << "\nInserting new instruction before: ";
642 MIB.getInstr()->dump());
643 MIB = BuildMI(*MBB, *MI, DL, HII->get(finalOpcode), DestReg)
644 .addReg(SrcOp1Reg, getRegState(SrcOp1))
645 .addReg(SrcOp2.getReg(), getRegState(SrcOp2));
646 LLVM_DEBUG(dbgs() << "\nInserting new instruction: ";
647 MIB.getInstr()->dump());
648 // If Op1 is not killed convert back to qf, since there
649 // are uses for this qf op.
650 if (!SrcOp1.isKill())
651 insertIEEEToQF(&*(++MI->getIterator()), SrcOp1.getReg(), SrcOp1,
652 true /*sf type reg*/);
653 } else {
654
655 MIB = BuildMI(*MBB, *MI, DL, HII->get(finalOpcode), DestReg)
656 .addReg(SrcOp1.getReg(), getRegState(SrcOp1))
657 .addReg(SrcOp2.getReg(), getRegState(SrcOp2));
658 LLVM_DEBUG(dbgs() << "\nInserting new instruction: ";
659 MIB.getInstr()->dump());
660 }
661 Changed |= true;
662 } else {
663 // Both the operands of this instructions are valid, so no use of
664 // this instruction is to be modified. We need to remove this
665 // instruction from the action map QFUsesMap.
666 eraseList.push_back(MI);
667 }
668 }
669
670 for (MachineInstr *delMI : eraseList)
671 QFUsesMap.erase(delMI);
672
673 return Changed;
674}
675
676// Insert a new instruction.
677void HexagonPostRAHandleQFP::insertInstr(MachineInstr *MI, unsigned MIOpcode,
678 unsigned SrcReg, unsigned DstReg,
679 RegState Flags) {
680
681 MachineInstrBuilder MIB;
682 MachineBasicBlock *MBB = MI->getParent();
683 DebugLoc DL = MI->getDebugLoc();
685 auto MINext = ++MI->getIterator();
686 if (++MIt == MBB->end())
687 MIB = BuildMI(MBB, DL, HII->get(MIOpcode), DstReg).addReg(SrcReg, Flags);
688 else
689 MIB = BuildMI(*MBB, MINext, DL, HII->get(MIOpcode), DstReg)
690 .addReg(SrcReg, Flags);
691 LLVM_DEBUG(dbgs() << "\t\tInserting after conv: "; MIB.getInstr()->dump());
692}
693
694// Find an available vector register to store 0x0. We have reserved vector
695// register v30 to be exempted from being used during register allocation
696// for this purpose.
697MCPhysReg HexagonPostRAHandleQFP::findAllocatableReg(MachineInstr *MI) const {
698 LLVM_DEBUG(dbgs() << "\tUsing V30 register to store a vector of zeroes!");
699 return Hexagon::V30;
700}
701
702// Insert qf = sf/hf conversions before non-saturating instructions
703bool HexagonPostRAHandleQFP::HandleNonSatInstr() {
704
705 for (auto It : QFNonSatMIs) {
706 MachineInstr *MI = It.first;
707 auto MIOpcode = MI->getOpcode();
708 auto Op = MI->getOperand(1);
709 Register DefReg = Op.getReg();
710 LLVM_DEBUG(dbgs() << "Analyzing convert instruction: "; MI->dump());
711 // Handle hf = qf16.
712 // Handle f8 = qf16
713 if (MIOpcode == Hexagon::V6_vconv_hf_qf16 ||
714 MIOpcode == Hexagon::V6_vconv_f8_qf16) {
715
716 insertIEEEToQF(MI, DefReg, Op);
717 // TODO Check if there are any reaching def which is qf generating type.
718 // That op should be converted to sf/hf
719 if (!Op.isKill())
720 insertInstr(MI, Hexagon::V6_vconv_hf_qf16, DefReg, DefReg,
721 getRegState(Op) | RegState::Kill);
722
723 // Handle hf = qf.qf.
724 // Handle bf = qf.qf
725 } else if (MIOpcode == Hexagon::V6_vconv_hf_qf32 ||
726 MIOpcode == Hexagon::V6_vconv_bf_qf32) {
727 Register DefLo = HRI->getSubReg(DefReg, Hexagon::vsub_lo);
728 Register DefHi = HRI->getSubReg(DefReg, Hexagon::vsub_hi);
729
730 if (It.second == ConvOperand::HiLo) {
731 insertIEEEToQF(MI, DefLo, Op, true /* sf type */);
732 insertIEEEToQF(MI, DefHi, Op, true /* sf type */);
733
734 // Check which subregister is live and convert it
735 // and according insert conversion for that subreg
736 auto KillState = SubRegKillSet[MI];
737 if (!KillState.first)
738 insertInstr(MI, Hexagon::V6_vconv_sf_qf32, DefHi, DefHi,
739 getRegState(Op) | RegState::Kill);
740
741 if (!KillState.second)
742 insertInstr(MI, Hexagon::V6_vconv_sf_qf32, DefLo, DefLo,
743 getRegState(Op) | RegState::Kill);
744
745 } else if (It.second == ConvOperand::Hi) {
746 insertIEEEToQF(MI, DefHi, Op, true /* sf type */);
747 if (!Op.isKill())
748 insertInstr(MI, Hexagon::V6_vconv_sf_qf32, DefHi, DefHi,
749 getRegState(Op) | RegState::Kill);
750
751 } else { // It.second == ConvOperand::Lo
752 insertIEEEToQF(MI, DefLo, Op, true /* sf type */);
753 if (!Op.isKill())
754 insertInstr(MI, Hexagon::V6_vconv_sf_qf32, DefLo, DefLo,
755 getRegState(Op) | RegState::Kill);
756 }
757 // Handle sf = qf32.
758 } else if (MIOpcode == Hexagon::V6_vconv_sf_qf32) {
759 insertIEEEToQF(MI, DefReg, Op, true /* sf type */);
760 if (!Op.isKill())
761 insertInstr(MI, Hexagon::V6_vconv_sf_qf32, DefReg, DefReg,
762 getRegState(Op) | RegState::Kill);
763
764 } else {
765 llvm_unreachable("Unhandled non-saturating instruction!");
766 }
767 }
768
769 if (QFNonSatMIs.empty())
770 return false;
771 return true;
772}
773
774// Calculates the liveness of subregisters (whether killed or not)
775// when double register is used. This is necessary because RDF
776// carries liveness of the superreg and not the subregisters individually
777void HexagonPostRAHandleQFP::collectLivenessForSubregs(
778 NodeAddr<UseNode *> &UsedNode) {
779 RegisterRef UR = UsedNode.Addr->getRegRef(*DFG);
780 NodeAddr<StmtNode *> UseStmt = UsedNode.Addr->getOwner(*DFG);
781 MachineInstr *UseInstr = UseStmt.Addr->getCode();
782 auto UseOp = UseInstr->getOperand(1);
783 Register UseDefLo = HRI->getSubReg(UseOp.getReg(), Hexagon::vsub_lo);
784 Register UseDefHi = HRI->getSubReg(UseOp.getReg(), Hexagon::vsub_hi);
785
786 NodeSet Visited, Defs;
787 bool isHiSubRegKilled = true, isLoSubRegKilled = true;
788 const auto &P = LV->getAllReachingDefsRec(UR, UsedNode, Visited, Defs);
789
790 if (!P.second)
791 return;
792
793 for (auto RD : P.first) {
794 NodeAddr<DefNode *> RegDef = DFG->addr<DefNode *>(RD);
795 Register RR = RegDef.Addr->getRegRef(*DFG).Id;
796 if (HRI->isFakeReg(RR))
797 continue;
798 NodeAddr<StmtNode *> RegStmt = RegDef.Addr->getOwner(*DFG);
799 MachineInstr *ReachDefInstr = RegStmt.Addr->getCode();
800 if (ReachDefInstr == nullptr)
801 continue;
802
803 // If the reaching def is WReg, then the kill flag in the use is correct
804 // since there is no subreg
805 Register DefReg = ReachDefInstr->getOperand(0).getReg();
806 if (Hexagon::HvxWRRegClass.contains(DefReg)) {
807 if (!UseOp.isKill())
808 isHiSubRegKilled = isLoSubRegKilled = false;
809
810 // If the reaching ref is VReg, the liveness might be different between
811 // each of the subreg. Handle them individually.
812 // Find the other uses after this use for the reaching def. If it exists,
813 // the subregister is live after the use.
814 // NOTE: Assumption: The uses are in order in RDF.
815 } else {
816 NodeSet UseSet;
817 getAllRealUses(RegDef, UseSet, LV, DFG);
818 for (auto UIntr : UseSet) {
819 NodeAddr<UseNode *> UA = DFG->addr<UseNode *>(UIntr);
820 NodeAddr<StmtNode *> UseStmt = UA.Addr->getOwner(*DFG);
821 MachineInstr *UseMI = UseStmt.Addr->getCode();
822 if (UseMI == nullptr)
823 continue;
824 // When we reach the use set a flag to see if there are other uses
825 // after this. If yes, then the register is not killed.
826 if (UseMI == UseInstr)
827 continue;
828 if (HII->isMIBefore(UseInstr, UseMI) && DefReg == UseDefLo) {
829 isLoSubRegKilled = false;
830 break;
831 }
832 if (HII->isMIBefore(UseInstr, UseMI) && DefReg == UseDefHi) {
833 isHiSubRegKilled = false;
834 break;
835 }
836 }
837 }
838 }
839 SubRegKillSet[UseInstr] = std::make_pair(isHiSubRegKilled, isLoSubRegKilled);
840}
841
842// Store all refill instructions.
843void HexagonPostRAHandleQFP::collectQFPStackRefill(
844 NodeAddr<StmtNode *> *StNode) {
845 NodeAddr<DefNode *> DfNode =
846 StNode->Addr->members_if(DFG->IsDef, *DFG).front();
847 MachineInstr *MI = StNode->Addr->getCode();
848 // Check if operand to this instruction is a frame index.
849 const MachineOperand &OpFI = MI->getOperand(1);
850 if (!OpFI.isFI())
851 return;
852
853 // LLVM_DEBUG(dbgs() << "\n[Stack Refill]: Collecting: "; MI->dump());
854 RefillMIs.push_back(DfNode);
855}
856
857// Iterate over the uses of the qf generating instruction in RDG graph
858// If we get a qf to IEEE convert instruction, add it to a list.
859void HexagonPostRAHandleQFP::collectConvQFInstr(NodeAddr<DefNode *> &RegDef) {
860
861 NodeSet UseSet;
862 NodeAddr<StmtNode *> DefStmt = RegDef.Addr->getOwner(*DFG);
863 MachineInstr *DefInstr = DefStmt.Addr->getCode();
864 getAllRealUses(RegDef, UseSet, LV, DFG);
865 for (auto UI : UseSet) {
866 NodeAddr<UseNode *> UA = DFG->addr<UseNode *>(UI);
867 if (UA.Addr->getFlags() & NodeAttrs::PhiRef)
868 continue;
869 NodeAddr<StmtNode *> UseStmt = UA.Addr->getOwner(*DFG);
870 MachineInstr *QFConvInstr = UseStmt.Addr->getCode();
871 if (std::find(QFNonSatInstr.begin(), QFNonSatInstr.end(),
872 QFConvInstr->getOpcode()) != QFNonSatInstr.end()) {
873
874 // The use is a double register type. But the def can be hi/lo or double
875 // type. So conversion needs to be inserted only for the type
876 // which is in IEEE form.
877 auto UseReg = QFConvInstr->getOperand(1).getReg();
878 auto DefReg = DefInstr->getOperand(0).getReg();
879 if (Hexagon::HvxWRRegClass.contains(UseReg)) {
880
881 collectLivenessForSubregs(UA);
882 unsigned Op = ConvOperand::Undefined;
883 if (QFNonSatMIs.contains(QFConvInstr))
884 Op = QFNonSatMIs[QFConvInstr];
885
886 // Def is double type
887 if (Hexagon::HvxWRRegClass.contains(DefReg))
888 Op = ConvOperand::HiLo;
889 // Def is lo of double type
890 else if (DefReg == HRI->getSubReg(UseReg, Hexagon::vsub_lo))
891 Op |= ConvOperand::Lo;
892 // Def is hi of double type
893 else
894 Op |= ConvOperand::Hi;
895 QFNonSatMIs[QFConvInstr] = Op;
896 } else // for other def-use, BothOp is used as default
897 QFNonSatMIs[QFConvInstr] = ConvOperand::HiLo;
898
899 IgnoreInsertConvList.insert(DefInstr);
900 LLVM_DEBUG(std::string OpType = ""; switch (QFNonSatMIs[QFConvInstr]) {
901 case ConvOperand::HiLo:
902 OpType = "HiLo Op";
903 break;
904 case ConvOperand::Lo:
905 OpType = "Lo Op";
906 break;
907 case ConvOperand::Hi:
908 OpType = "Hi Op";
909 break;
910 default:
911 OpType = "Undefined";
912 } dbgs() << "Collecting convert instruction with type "
913 << OpType << " : ";
914 QFConvInstr->dump());
915 }
916 }
917}
918
919// Check if the COPY statements use came from a def which generates
920// a qf type. If yes, collect it in a vector. Also, collect copies
921// with reaching def other copies (nested copies).
922void HexagonPostRAHandleQFP::collectCopies(NodeAddr<StmtNode *> *StNode) {
923
924 NodeAddr<DefNode *> CopyDef =
925 StNode->Addr->members_if(DFG->IsDef, *DFG).front();
926 MachineInstr *CopyInstr = StNode->Addr->getCode();
927 LLVM_DEBUG(dbgs() << "\nAnalyzing copy: "; StNode->Addr->getCode()->dump());
928
929 for (NodeAddr<UseNode *> UA : StNode->Addr->members_if(DFG->IsUse, *DFG)) {
930 RegisterRef UR = UA.Addr->getRegRef(*DFG);
931 NodeSet Visited, Defs;
932 const auto &P = LV->getAllReachingDefsRec(UR, UA, Visited, Defs);
933 if (!P.second) {
934 LLVM_DEBUG({
935 dbgs() << "*** Unable to collect all reaching defs for use ***\n"
936 << PrintNode<UseNode *>(UA, *DFG) << '\n';
937 });
938 continue;
939 }
940
941 // Note: there can be multiple reaching defs of the copy
942 for (auto RD : P.first) {
943 NodeAddr<DefNode *> RegDef = DFG->addr<DefNode *>(RD);
944 Register RR = RegDef.Addr->getRegRef(*DFG).Id;
945 if (HRI->isFakeReg(RR))
946 continue;
947 NodeAddr<StmtNode *> RegStmt = RegDef.Addr->getOwner(*DFG);
948 MachineInstr *ReachDefInstr = RegStmt.Addr->getCode();
949 if (ReachDefInstr == nullptr)
950 continue;
951 LLVM_DEBUG(dbgs() << "\t[Reaching Def]: "; ReachDefInstr->dump());
952
953 // If the reaching def is a COPY,collect it with reg type ieee
954 if (ReachDefInstr->getOpcode() == TargetOpcode::COPY) {
955 auto pairKey = std::make_pair(CopyDef.Id, RegDef.Id);
956 QFCopys[pairKey] = RegType::ieee;
957 continue;
958 }
959
960 // If the reaching def is a qf instr, collect the copy.
961 // reg type is selected based on the op
962 auto RegT = RegType::undefined;
963 if (HII->isQFPInstr(ReachDefInstr)) {
964 if (HII->isQFP32Instr(ReachDefInstr)) {
965 // check whether the copies register is hvxWR or hvxVR type
966 // NOTE: Assumption: A copy's reaching def shall not be 2,
967 // i.e., for each of the subregister.
968 if (Hexagon::HvxWRRegClass.contains(
969 ReachDefInstr->getOperand(0).getReg()))
970 RegT = RegType::qf32_double;
971 else
972 RegT = RegType::qf32;
973 } else if (HII->isQFP16Instr(ReachDefInstr)) {
974 // Check if qf16 instruction outputs double-wide register
975 if (Hexagon::HvxWRRegClass.contains(
976 ReachDefInstr->getOperand(0).getReg())) {
977 RegT = RegType::qf16_double;
978 } else {
979 RegT = RegType::qf16;
980 }
981 }
982 } else {
983 // if the copy involves non-qf vector registers collect it too
984 Register CopyReg = CopyInstr->getOperand(1).getReg();
985 if (Hexagon::HvxWRRegClass.contains(CopyReg) ||
986 Hexagon::HvxVRRegClass.contains(CopyReg))
987 RegT = RegType::ieee;
988 else
989 continue;
990 }
991 auto pairKey = std::make_pair(CopyDef.Id, RegDef.Id);
992 QFCopys[pairKey] = RegT;
993 }
994 }
995}
996
997// Inserts an qf instruction to a list. These instruction
998// values are spilled to the stack.
999void HexagonPostRAHandleQFP::collectQFPStackSpill(
1000 NodeAddr<StmtNode *> *StNode) {
1001
1002 MachineInstr *MI = StNode->Addr->getCode();
1003 LLVM_DEBUG(dbgs() << "\n[Stack Spill]: Analyzing: "; MI->dump());
1004 // Check if operand to this instruction is a frame index.
1005 const MachineOperand &OpFI = MI->getOperand(0);
1006 if (!OpFI.isFI())
1007 return;
1008
1009 // Pre-RegAlloc
1010 //%46:hvxwr = V6_vmpy_qf32_hf %7:hvxvr, %10:hvxvr
1011 // PS_vstorerw_ai %stack.3, 0, %46:hvxwr :: (store (s2048) into %stack.3,
1012 // align 128)
1013 // Post-RegAlloc
1014 // renamable $w4 = V6_vmpy_qf32_hf killed renamable $v1, renamable $v0
1015 // PS_vstorerw_ai %stack.3, 0, renamable $w4 :: (store (s2048) into %stack.3,
1016 // align 128)
1017
1018 if (!MI->getOperand(2).isReg())
1019 return;
1020
1021 // Iterate over the operands of the store instruction to get their reaching
1022 // defs
1023 NodeId QFPDefNode = 0;
1024 for (NodeAddr<UseNode *> UA : StNode->Addr->members_if(DFG->IsUse, *DFG)) {
1025 QFPDefNode = UA.Addr->getReachingDef();
1026
1027 // Get the defining instruction node(s)
1028 NodeAddr<DefNode *> RegDef = DFG->addr<DefNode *>(QFPDefNode);
1029 assert(QFPDefNode != 0 && "Reaching def computation error");
1030 NodeAddr<StmtNode *> RegStmt = RegDef.Addr->getOwner(*DFG);
1031 MachineInstr *ReachDefInstr = RegStmt.Addr->getCode();
1032 if (ReachDefInstr == nullptr)
1033 continue;
1034 LLVM_DEBUG(dbgs() << "[Stack Spill]:\tReaching Def of operand:";
1035 ReachDefInstr->dump());
1036 // Reaching Def cannot be a phi instruction.
1037 if (RegDef.Addr->getFlags() & NodeAttrs::PhiRef)
1038 continue;
1039
1040 if (!HII->isQFPInstr(ReachDefInstr))
1041 continue;
1042
1043 auto RR = RegDef.Addr->getRegRef(*DFG).Id;
1044 if (HRI->isFakeReg(RR))
1045 continue;
1046
1047 LLVM_DEBUG(dbgs() << "Found a QFPStackSpill via \n"; MI->dump();
1048 dbgs() << "The corresponding XQF instruction is:\n";
1049 ReachDefInstr->dump());
1050
1051 // Collect the spills.
1052 SpillMIs.push_back(std::make_pair(MI, RegDef));
1053 }
1054}
1055
1056// Find the uses of qf generating instructions and conditionally add them
1057// to a list.
1058void HexagonPostRAHandleQFP::collectQFUses(NodeAddr<DefNode *> RegDef,
1059 MachineInstr *DefMI) {
1060
1061 NodeSet UseSet;
1062 LLVM_DEBUG(dbgs() << " Finding uses of: "; DefMI->dump(););
1063 getAllRealUses(RegDef, UseSet, LV, DFG);
1064
1065 for (auto UI : UseSet) {
1066 NodeAddr<UseNode *> UA = DFG->addr<UseNode *>(UI);
1067 if (UA.Addr->getFlags() & NodeAttrs::PhiRef)
1068 continue;
1069 NodeAddr<StmtNode *> UseStmt = UA.Addr->getOwner(*DFG);
1070 MachineInstr *UseMI = UseStmt.Addr->getCode();
1071 LLVM_DEBUG(dbgs() << "\t\t\t[Reached Use of QF operand]: "; UseMI->dump());
1072
1073 Register UsedReg = UA.Addr->getRegRef(*DFG).Id;
1074 if (QFPSatInstsMap.find(UseMI->getOpcode()) != QFPSatInstsMap.end()) {
1075 if (PossibleMultiReachDefs.count(UseStmt.Id) == 0) {
1076 PossibleMultiReachDefs.insert(UseStmt.Id);
1077 LLVM_DEBUG(dbgs() << "\n[Collect instr with possible multidef]:";
1078 UseMI->dump());
1079 }
1080 conditionallyInsert(*UseMI, UsedReg);
1081 }
1082 }
1083}
1084
1085// Process the list which can have multiple definitions. A possible case
1086// can be reaching defs to be a copy and a qf-generating instr respectively.
1087// Only handle the qf-generating instruction by inserting convert to sf/hf
1088// after it. Additionally, then handle the reached uses of this reaching
1089// def since the type has changed to sf/hf from qf after the conversion.
1090bool HexagonPostRAHandleQFP::HandleMultiReachingDefs() {
1091
1092 bool Changed = false;
1093 // Note: It may seem this loop can further add to PossibleMultiReachDefs.
1094 // But it is not expected to since if any instruction has multiple
1095 // definitions it should already be present in it.
1096 for (auto It : PossibleMultiReachDefs) {
1097 NodeAddr<StmtNode *> Stmt = DFG->addr<StmtNode *>(It);
1098 MachineInstr *Instr = Stmt.Addr->getCode();
1099 // get the op type for the original instruction.
1100 // True is sf/hf, false is qf
1101 auto Pair = QFUsesMap[Instr];
1102
1103 unsigned short UseNo = 1;
1104 // Iterate over the operands
1105 for (NodeAddr<UseNode *> UA : Stmt.Addr->members_if(DFG->IsUse, *DFG)) {
1106
1107 // If the type is qf for the operand,
1108 // we skip since there is no scope for mismatch
1109 if ((UseNo == 1 && Pair.first == false) ||
1110 (UseNo == 2 && Pair.second == false)) {
1111 ++UseNo;
1112 continue;
1113 }
1114
1115 RegisterRef UR = UA.Addr->getRegRef(*DFG);
1116 NodeSet Visited, Defs;
1117 const auto &P = LV->getAllReachingDefsRec(UR, UA, Visited, Defs);
1118 if (!P.second) {
1119 LLVM_DEBUG({
1120 dbgs() << "*** Unable to collect all reaching defs for use ***\n"
1121 << PrintNode<UseNode *>(UA, *DFG) << '\n';
1122 });
1123 continue;
1124 }
1125
1126 // Iterate over the reaching defs and process the ones which
1127 // generate qf. Ignore the ones which have already been handled
1128 for (auto RD : P.first) {
1129 NodeAddr<DefNode *> RegDef = DFG->addr<DefNode *>(RD);
1130
1131 // Ignore fake reaches
1132 auto RR = RegDef.Addr->getRegRef(*DFG).Id;
1133 if (HRI->isFakeReg(RR))
1134 continue;
1135
1136 NodeAddr<StmtNode *> RegStmt = RegDef.Addr->getOwner(*DFG);
1137 MachineInstr *ReachDefInstr = RegStmt.Addr->getCode();
1138
1139 if (ReachDefInstr == nullptr)
1140 continue;
1141
1142 if (!HII->isQFPInstr(ReachDefInstr))
1143 continue;
1144 if (IgnoreInsertConvList.find(ReachDefInstr) !=
1145 IgnoreInsertConvList.end())
1146 continue;
1147 LLVM_DEBUG(dbgs() << "[Multidef] Handling reaching def:";
1148 ReachDefInstr->dump());
1149
1150 auto *MBB = ReachDefInstr->getParent();
1151 auto &dl = ReachDefInstr->getDebugLoc();
1152 auto NextReachMI = ++ReachDefInstr->getIterator();
1153 auto DefOp = ReachDefInstr->getOperand(0);
1154 Register OpReg = DefOp.getReg();
1155 MachineInstrBuilder MIB;
1156
1157 // For double vector regs, two conversions are inserted. Single
1158 // conversion for qf32 type
1159 if (HII->isQFP32Instr(ReachDefInstr)) {
1160 // if the reaching def is a qf double type
1161 if (Hexagon::HvxWRRegClass.contains(
1162 ReachDefInstr->getOperand(0).getReg())) {
1163 Register RegLo = HRI->getSubReg(OpReg, Hexagon::vsub_lo);
1164 Register RegHi = HRI->getSubReg(OpReg, Hexagon::vsub_hi);
1165 MIB = BuildMI(*MBB, NextReachMI, dl,
1166 HII->get(Hexagon::V6_vconv_sf_qf32), RegLo)
1167 .addReg(RegLo, RegState::Renamable | RegState::Kill);
1168 LLVM_DEBUG(dbgs() << "[MultiDef] Inserting convert instruction: ";
1169 MIB.getInstr()->dump());
1170 MIB = BuildMI(*MBB, NextReachMI, dl,
1171 HII->get(Hexagon::V6_vconv_sf_qf32), RegHi)
1172 .addReg(RegHi, RegState::Renamable | RegState::Kill);
1173 } else { // If the reaching def is a qf type
1174 MIB = BuildMI(*MBB, NextReachMI, dl,
1175 HII->get(Hexagon::V6_vconv_sf_qf32), OpReg)
1176 .addReg(OpReg, RegState::Renamable | RegState::Kill);
1177 }
1178 }
1179 if (HII->isQFP16Instr(ReachDefInstr)) {
1180 MIB = BuildMI(*MBB, NextReachMI, dl,
1181 HII->get(Hexagon::V6_vconv_hf_qf16), OpReg)
1182 .addReg(OpReg, RegState::Renamable | RegState::Kill);
1183 }
1184 LLVM_DEBUG(dbgs() << "[MultiDef] Inserting convert instruction: ";
1185 MIB.getInstr()->dump(); dbgs() << "\tafter instruction: ";
1186 ReachDefInstr->dump());
1187
1188 // find the uses of the newly transformed to sf/hf and handle
1189 // accordingly. Uses can be vmul/vadd/etc. types or converts which take
1190 // in qf types.
1191 collectQFUses(RegDef, ReachDefInstr);
1192 collectConvQFInstr(RegDef);
1193 IgnoreInsertConvList.insert(ReachDefInstr);
1194 Changed = true;
1195 }
1196 UseNo++;
1197 }
1198 }
1199 return Changed;
1200}
1201
1202bool HexagonPostRAHandleQFP::HandleConvertToQfCopies() {
1203 if (ConvertToQfCopies.empty())
1204 return false;
1205
1206 LLVM_DEBUG(
1207 dbgs() << "\n*** Inserting convert to qf for selected copies ***\n");
1208
1209 // Any reached use of the copy should not already be collected to be
1210 // converted to IEEE. If present, it means that the reached use has
1211 // other reaching def with type IEEE, other than this copy.
1212 auto CanTransform = [&](MachineInstr *MI, unsigned OpNo) -> bool {
1213 if (QFUsesMap.find(MI) != QFUsesMap.end()) {
1214 auto Entry = QFUsesMap[MI];
1215 if (OpNo == 1 && Entry.first == true)
1216 return false;
1217 if (OpNo == 2 && Entry.second == true)
1218 return false;
1219 }
1220 return true;
1221 };
1222
1223 for (auto It : ConvertToQfCopies) {
1224 NodeSet UseSet;
1225 getAllRealUses(It.second.first, UseSet, LV, DFG);
1226
1227 bool transform = true;
1228 for (auto UI : UseSet) {
1229 NodeAddr<UseNode *> UA = DFG->addr<UseNode *>(UI);
1230 if (UA.Addr->getFlags() & NodeAttrs::PhiRef)
1231 continue;
1232 NodeAddr<StmtNode *> UseStmt = UA.Addr->getOwner(*DFG);
1233 MachineInstr *UseMI = UseStmt.Addr->getCode();
1234 unsigned OpNo = UA.Addr->getOp().getOperandNo();
1235
1236 if (!CanTransform(UseMI, OpNo)) {
1237 transform = false;
1238 break;
1239 }
1240 }
1241
1242 if (transform) {
1243
1244 LLVM_DEBUG(dbgs() << "\n[HandleConvertToQfCopies]\tProcessing Copy:";
1245 It.first->dump());
1246 auto CopyOp = It.first->getOperand(0);
1247 auto NextMIIter = std::next(It.first->getIterator());
1248 switch (It.second.second) {
1249 case RegType::qf32_double: {
1250 Register DefLo = HRI->getSubReg(CopyOp.getReg(), Hexagon::vsub_lo);
1251 Register DefHi = HRI->getSubReg(CopyOp.getReg(), Hexagon::vsub_hi);
1252 insertIEEEToQF(&*NextMIIter, DefLo, CopyOp, /*is32bit=*/true);
1253 insertIEEEToQF(&*NextMIIter, DefHi, CopyOp, /*is32bit=*/true);
1254 break;
1255 }
1256 case RegType::qf16_double: {
1257 Register DefLo = HRI->getSubReg(CopyOp.getReg(), Hexagon::vsub_lo);
1258 Register DefHi = HRI->getSubReg(CopyOp.getReg(), Hexagon::vsub_hi);
1259 insertIEEEToQF(&*NextMIIter, DefLo, CopyOp, /*is32bit=*/false);
1260 insertIEEEToQF(&*NextMIIter, DefHi, CopyOp, /*is32bit=*/false);
1261 break;
1262 }
1263 case RegType::qf16:
1264 insertIEEEToQF(&*NextMIIter, CopyOp.getReg(), CopyOp,
1265 /*is32bit=*/false);
1266 break;
1267 case RegType::qf32:
1268 insertIEEEToQF(&*NextMIIter, CopyOp.getReg(), CopyOp, /*is32bit=*/true);
1269 break;
1270 default:
1271 break;
1272 }
1273 } else {
1274 collectQFUses(It.second.first, It.first);
1275 collectConvQFInstr(It.second.first);
1276 }
1277 }
1278 return true;
1279}
1280
1281bool HexagonPostRAHandleQFP::HandleReachDefOfCopies() {
1282 if (ReachDefOfCopies.empty())
1283 return false;
1284
1285 MachineInstrBuilder MIB;
1286 for (auto It : ReachDefOfCopies) {
1287 auto *MBB = It.first->getParent();
1288 auto &dl = It.first->getDebugLoc();
1289 auto NextMI = ++(It.first)->getIterator();
1290 auto RegOp = It.first->getOperand(0);
1291 Register OpReg = RegOp.getReg();
1292
1293 if (It.second == RegType::qf32)
1294 MIB =
1295 BuildMI(*MBB, NextMI, dl, HII->get(Hexagon::V6_vconv_sf_qf32), OpReg)
1296 .addReg(OpReg, RegState::Renamable | RegState::Kill);
1297 else if (It.second == RegType::qf16)
1298 MIB =
1299 BuildMI(*MBB, NextMI, dl, HII->get(Hexagon::V6_vconv_hf_qf16), OpReg)
1300 .addReg(OpReg, RegState::Renamable | RegState::Kill);
1301 else if (It.second == RegType::qf32_double) {
1302 Register RegLo = HRI->getSubReg(OpReg, Hexagon::vsub_lo);
1303 Register RegHi = HRI->getSubReg(OpReg, Hexagon::vsub_hi);
1304 MIB =
1305 BuildMI(*MBB, NextMI, dl, HII->get(Hexagon::V6_vconv_sf_qf32), RegLo)
1306 .addReg(RegLo, RegState::Renamable | RegState::Kill);
1307 LLVM_DEBUG(dbgs() << "Inserting convert instruction: ";
1308 MIB.getInstr()->dump());
1309 MIB =
1310 BuildMI(*MBB, NextMI, dl, HII->get(Hexagon::V6_vconv_sf_qf32), RegHi)
1311 .addReg(RegHi, RegState::Renamable | RegState::Kill);
1312 } else if (It.second == RegType::qf16_double) {
1313 Register RegLo = HRI->getSubReg(OpReg, Hexagon::vsub_lo);
1314 Register RegHi = HRI->getSubReg(OpReg, Hexagon::vsub_hi);
1315 MIB =
1316 BuildMI(*MBB, NextMI, dl, HII->get(Hexagon::V6_vconv_hf_qf16), RegLo)
1317 .addReg(RegLo, RegState::Renamable | RegState::Kill);
1318 LLVM_DEBUG(dbgs() << "Inserting convert instruction: ";
1319 MIB.getInstr()->dump());
1320 MIB =
1321 BuildMI(*MBB, NextMI, dl, HII->get(Hexagon::V6_vconv_hf_qf16), RegHi)
1322 .addReg(RegHi, RegState::Renamable | RegState::Kill);
1323 }
1324 LLVM_DEBUG(dbgs() << "Inserting convert instruction: ";
1325 MIB.getInstr()->dump(); dbgs() << "\tafter instruction: ";
1326 It.first->dump());
1327 }
1328 return true;
1329}
1330
1331HexagonPostRAHandleQFP::RegType
1332HexagonPostRAHandleQFP::HasQfUses(NodeAddr<DefNode *> CopyDef,
1333 MachineInstr *CopyMI) {
1334 NodeSet UseSet;
1335 getAllRealUses(CopyDef, UseSet, LV, DFG);
1336
1337 if (UseSet.size() == 0)
1338 return RegType::undefined;
1339
1340 bool hasQf16Use = false;
1341 bool hasQf32Use = false;
1342
1343 LLVM_DEBUG(dbgs() << "[COPY]\nUses of the copy are: ");
1344 for (auto UI : UseSet) {
1345 NodeAddr<UseNode *> UA = DFG->addr<UseNode *>(UI);
1346 if (UA.Addr->getFlags() & NodeAttrs::PhiRef)
1347 continue;
1348 NodeAddr<StmtNode *> UseStmt = UA.Addr->getOwner(*DFG);
1349 MachineInstr *UseMI = UseStmt.Addr->getCode();
1350 unsigned OpNo = UA.Addr->getOp().getOperandNo();
1351
1352 LLVM_DEBUG(dbgs() << "\nCopy's use: "; UseMI->dump());
1353 // Any reached use should not be a non-qf instruction
1354 if (!HII->usesQFOperand(UseMI, OpNo))
1355 return RegType::ieee;
1356
1357 // Determine the qf type from the use
1358 if (HII->usesQF16Operand(UseMI, OpNo))
1359 hasQf16Use = true;
1360 else if (HII->usesQF32Operand(UseMI, OpNo))
1361 hasQf32Use = true;
1362
1363 // Any reached use should not already be converted to IEEE.
1364 // If present, it means that the reached use has other reaching def
1365 // other than the copy.
1366 if (QFUsesMap.find(UseMI) != QFUsesMap.end()) {
1367 auto Entry = QFUsesMap[UseMI];
1368 if (OpNo == 1 && Entry.first == true)
1369 return RegType::ieee;
1370 if (OpNo == 2 && Entry.second == true)
1371 return RegType::ieee;
1372 }
1373 }
1374
1375 // Set the output type based on uses
1376 if (hasQf16Use) {
1377 // Check if copy destination is double-wide
1378 if (Hexagon::HvxWRRegClass.contains(CopyMI->getOperand(0).getReg()))
1379 return RegType::qf16_double;
1380 else
1381 return RegType::qf16;
1382 } else if (hasQf32Use) {
1383 if (Hexagon::HvxWRRegClass.contains(CopyMI->getOperand(0).getReg()))
1384 return RegType::qf32_double;
1385 else
1386 return RegType::qf32;
1387 }
1388
1389 return RegType::undefined;
1390}
1391
1392// Go through the collected copies and insert conversion to sf/hf
1393// conditionally *after their reaching defs*. This is done because there
1394// can be mutliple reaching defs of the copies. Also, check for the uses
1395// of the reaching def and handle qf uses too by changing opcode or
1396// inserting converts.
1397// Additionally, check for the uses of the copy
1398// and handle them via changing opcode or inserting converts.
1399bool HexagonPostRAHandleQFP::HandleCopies() {
1400
1401 bool Changed = false;
1402
1403 // If a convert is inserted after a reaching def, add it to ignorelist.
1404 // This is because this reaching def can be reaching def of other copies
1405 // due to non-SSA form.
1406 for (auto It : QFCopys) {
1407
1408 // Get details of the copy node
1409 NodeAddr<DefNode *> CopyNode = DFG->addr<DefNode *>(It.first.first);
1410 NodeAddr<StmtNode *> StNode = CopyNode.Addr->getOwner(*DFG);
1411 [[maybe_unused]] auto *CopyMI = StNode.Addr->getCode();
1412 LLVM_DEBUG(dbgs() << "\nHandling Reaching Defs of COPY: "; CopyMI->dump();
1413 std::string Type; switch (It.second) {
1414 case RegType::qf32_double:
1415 Type = "qf32_double";
1416 break;
1417 case RegType::qf32:
1418 Type = "qf32";
1419 break;
1420 case RegType::qf16:
1421 Type = "qf16";
1422 break;
1423 case RegType::qf16_double:
1424 Type = "qf16_double";
1425 break;
1426 default:
1427 Type = "ieee";
1428 } dbgs() << "\t Type: "
1429 << Type << "\n");
1430
1431 // insert convert to IEEE after the reaching def if it generates qf type
1432 RegType RTy = It.second;
1433 if (RTy != RegType::ieee) {
1434
1435 // get details of the reaching def node
1436 NodeAddr<DefNode *> ReachDefNode = DFG->addr<DefNode *>(It.first.second);
1437 NodeAddr<StmtNode *> StNode = ReachDefNode.Addr->getOwner(*DFG);
1438 auto *ReachingDef = StNode.Addr->getCode();
1439
1440 if (IgnoreInsertConvList.find(ReachingDef) != IgnoreInsertConvList.end())
1441 continue;
1442
1443 // Collect the reaching defs to be processed later.
1444 ReachDefOfCopies.insert(std::make_pair(ReachingDef, RTy));
1445
1446 // Process the reached uses of the reaching def now for
1447 // incorrect usage, since the register type has changed
1448 // following the conversion.
1449 LLVM_DEBUG(dbgs() << "\n[COPY]\tAnalyzing uses of the reaching defs \
1450 of the copy...");
1451 collectQFUses(ReachDefNode, ReachingDef);
1452 collectConvQFInstr(ReachDefNode);
1453 IgnoreInsertConvList.insert(ReachingDef);
1454 Changed = true;
1455 }
1456 }
1457
1458 // Loop through copies with qf uses
1459 for (auto It : QFCopys) {
1460
1461 // Get details of the copy node
1462 NodeAddr<DefNode *> CopyNode = DFG->addr<DefNode *>(It.first.first);
1463 NodeAddr<StmtNode *> StNode = CopyNode.Addr->getOwner(*DFG);
1464 auto *CopyMI = StNode.Addr->getCode();
1465 LLVM_DEBUG(dbgs() << "\nHandling COPY: "; CopyMI->dump());
1466 RegType RTy = It.second;
1467
1468 // Process the reached uses of the copy to find any incorrect
1469 // qf uses. If the copy's uses are all qf types, we need to convert
1470 // its result back to qf
1471 // FIXME: don't include the copy if its the last instruction since
1472 // it is *probably* not possible to insert via BuildMI at the end of BB
1473 RTy = HasQfUses(CopyNode, CopyMI);
1474 if (RTy != RegType::ieee && RTy != RegType::undefined &&
1475 (++CopyMI->getIterator() != CopyMI->getParent()->end())) {
1476 if (!ConvertToQfCopies.contains(CopyMI)) {
1477 ConvertToQfCopies[CopyMI] = std::make_pair(CopyNode, RTy);
1478 LLVM_DEBUG(dbgs() << "\n[ConvertToQfCopies]\tAdded copy: ";
1479 CopyMI->dump(); std::string Type; switch (RTy) {
1480 case RegType::qf32_double:
1481 Type = "qf32_double";
1482 break;
1483 case RegType::qf32:
1484 Type = "qf32";
1485 break;
1486 case RegType::qf16:
1487 Type = "qf16";
1488 break;
1489 case RegType::qf16_double:
1490 Type = "qf16_double";
1491 break;
1492 default:
1493 Type = "ieee";
1494 } dbgs() << "\t Type: "
1495 << Type << "\n");
1496 }
1497 continue;
1498 }
1499 LLVM_DEBUG(dbgs() << "\n[COPY]\tAnalyzing uses of the copy...");
1500 collectQFUses(CopyNode, CopyMI);
1501 collectConvQFInstr(CopyNode);
1502 }
1503
1504 Changed |= HandleReachDefOfCopies();
1505 Changed |= HandleMultiReachingDefs();
1506 Changed |= HandleConvertToQfCopies();
1507
1508 return Changed;
1509}
1510
1511// Inserts conversion instruction sf/hf = qf before spilling
1512// Uses the same physical register for conversion.
1513// Additinally checks for the uses of the register; and
1514// conditionally store them to handle later.
1515bool HexagonPostRAHandleQFP::HandleSpills() {
1516
1517 LLVM_DEBUG(dbgs() << "\n[Handling Spill]\n");
1518 bool Changed = false;
1519 for (auto It : SpillMIs) {
1520
1521 MachineInstr *MI = It.first;
1522 auto OpC = MI->getOpcode();
1523
1524 auto NodeDef = It.second;
1525 NodeAddr<StmtNode *> Stmt = NodeDef.Addr->getOwner(*DFG);
1526 MachineInstr *DefMI = Stmt.Addr->getCode();
1527 auto RegOp = MI->getOperand(2);
1528 Register DefR = RegOp.getReg();
1529
1530 // handles widened qf16/qf32 instructions.
1531 if (OpC == Hexagon::PS_vstorerw_ai) {
1532 if (!Hexagon::HvxWRRegClass.contains(DefR))
1533 assert(false && " Unhandled Vector Register class passed\n");
1534 // Walk through the uses of DefLo and DefHi and if there is QFP
1535 // instructions, the instruction needs to be updated to use sf operands
1536 // instead of qf operands.
1537 collectQFUses(NodeDef, DefMI);
1538
1539 if (IgnoreInsertConvList.find(DefMI) != IgnoreInsertConvList.end())
1540 continue;
1541
1542 // Collect the reached uses of ReachDefInstr
1543 // which are sf/hf = qf conversion instructions.
1544 collectConvQFInstr(NodeDef);
1545 Register DefLo = HRI->getSubReg(DefR, Hexagon::vsub_lo);
1546 Register DefHi = HRI->getSubReg(DefR, Hexagon::vsub_hi);
1547
1548 // Create two copy instructions, one each for Hi and Lo conditionally.
1549 // Liveness is the same is for the store instruction for the register.
1550 // If both are double registers, two insertions are done.
1551 // If one of the subregs are reaching to the store, conversion is done
1552 // for that subreg.
1554 if (HII->isQFP16Instr(DefMI)) {
1555 if (DefLo == DReg || Hexagon::HvxWRRegClass.contains(DReg))
1556 insertInstr(DefMI, Hexagon::V6_vconv_hf_qf16, DefLo, DefLo,
1557 getRegState(RegOp) | RegState::Kill);
1558
1559 if (DefHi == DReg || Hexagon::HvxWRRegClass.contains(DReg))
1560 insertInstr(DefMI, Hexagon::V6_vconv_hf_qf16, DefHi, DefHi,
1561 getRegState(RegOp) | RegState::Kill);
1562 } else if (HII->isQFP32Instr(DefMI)) {
1563 if (DefLo == DReg || Hexagon::HvxWRRegClass.contains(DReg))
1564 insertInstr(DefMI, Hexagon::V6_vconv_sf_qf32, DefLo, DefLo,
1565 getRegState(RegOp) | RegState::Kill);
1566
1567 if (DefHi == DReg || Hexagon::HvxWRRegClass.contains(DReg))
1568 insertInstr(DefMI, Hexagon::V6_vconv_sf_qf32, DefHi, DefHi,
1569 getRegState(RegOp) | RegState::Kill);
1570 }
1571 IgnoreInsertConvList.insert(DefMI);
1572 Changed = true;
1573
1574 // Handles instructions which output qf32 type.
1575 } else if (OpC == Hexagon::PS_vstorerv_ai && HII->isQFP32Instr(DefMI)) {
1576 collectQFUses(NodeDef, DefMI);
1577 if (IgnoreInsertConvList.find(DefMI) != IgnoreInsertConvList.end())
1578 continue;
1579 collectConvQFInstr(NodeDef);
1580
1581 insertInstr(DefMI, Hexagon::V6_vconv_sf_qf32, DefR, DefR,
1582 getRegState(RegOp) | RegState::Kill);
1583
1584 IgnoreInsertConvList.insert(DefMI);
1585 Changed = true;
1586
1587 // Handles instructions which output qf16 type.
1588 } else if (OpC == Hexagon::PS_vstorerv_ai && HII->isQFP16Instr(DefMI)) {
1589 collectQFUses(NodeDef, DefMI);
1590 if (IgnoreInsertConvList.find(DefMI) != IgnoreInsertConvList.end())
1591 continue;
1592 collectConvQFInstr(NodeDef);
1593
1594 insertInstr(DefMI, Hexagon::V6_vconv_hf_qf16, DefR, DefR,
1595 getRegState(RegOp) | RegState::Kill);
1596
1597 IgnoreInsertConvList.insert(DefMI);
1598 Changed = true;
1599 } else {
1600 LLVM_DEBUG(MI->dump());
1601 llvm_unreachable("This case is not handled. Look above for MI\n");
1602 }
1603 }
1604 return Changed;
1605}
1606
1607bool HexagonPostRAHandleQFP::runOnMachineFunction(MachineFunction &MF) {
1608
1610 return false;
1611
1612 LLVM_DEBUG(
1613 dbgs() << "\n=== Entering Hexagon Fixup QF spills and refills pass ===\n"
1614 << "Mode: ";
1615 switch (QFloatModeValue) {
1616 case QFloatMode::StrictIEEE:
1617 dbgs() << "Strict IEEE";
1618 break;
1619 case QFloatMode::IEEE:
1620 dbgs() << "IEEE";
1621 break;
1622 case QFloatMode::Lossy:
1623 dbgs() << "Lossy";
1624 break;
1625 default:
1626 dbgs() << "Legacy";
1627 break;
1628 };
1629 dbgs() << "\n";);
1630 bool Changed = false;
1631
1632 auto &_HST = MF.getSubtarget<HexagonSubtarget>();
1633 if (!_HST.useHVXOps())
1634 return false;
1635
1636 HII = _HST.getInstrInfo();
1637
1638 // If the mode is legacy, the function may not contain qf instructions
1639 // check if this pass is required to run for legacy mode.
1640 if (QFloatModeValue == QFloatMode::Legacy)
1641 if (!HII->hasQFPInstrs(MF))
1642 return false;
1643
1644 HRI = _HST.getRegisterInfo();
1645 MRI = &MF.getRegInfo();
1646 const auto &MDF = getAnalysis<MachineDominanceFrontierWrapperPass>().getMDF();
1647 MachineDominatorTree *MDT =
1648 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1649 HST = &_HST;
1650
1651 // We need Register Dataflow Graph(RDG) to calculate reaching definitions
1652 // since the Machine code is not in SSA.
1653 // DDG holds the graph on which we iterate for the nodes.
1654 DataFlowGraph G(MF, *HII, *HRI, *MDT, MDF);
1655 G.build();
1656 DFG = &G;
1657
1658 Liveness L(*MRI, *DFG);
1659 L.computePhiInfo();
1660 LV = &L;
1661
1662 // Find and save the list of QFP stack spills.
1663 // For refills store all refill instructions to process conditionally later.
1664 NodeAddr<FuncNode *> FA = DFG->getFunc();
1665 LLVM_DEBUG(dbgs() << "==== [RefMap#]=====:\n "
1666 << Print<NodeAddr<FuncNode *>>(FA, *DFG) << "\n");
1667 for (NodeAddr<BlockNode *> BA : FA.Addr->members(*DFG)) {
1668 for (auto IA : BA.Addr->members(*DFG)) {
1669
1670 if (!DFG->IsCode<NodeAttrs::Stmt>(IA))
1671 continue;
1672
1673 // 'SA' holds the Statement node which contains the machine instruction.
1674 NodeAddr<StmtNode *> SA = IA;
1675 MachineInstr *I = SA.Addr->getCode();
1676
1677 switch (I->getOpcode()) {
1678 case Hexagon::PS_vstorerw_ai:
1679 case Hexagon::PS_vstorerv_ai:
1680 collectQFPStackSpill(&SA);
1681 break;
1682 case Hexagon::PS_vloadrw_ai:
1683 case Hexagon::PS_vloadrv_ai:
1684 collectQFPStackRefill(&SA);
1685 break;
1686 case TargetOpcode::COPY:
1687 collectCopies(&SA);
1688 break;
1689 default:
1690 break;
1691 }
1692 }
1693 }
1694
1695 // Walk through the spills and insert converts when necessary.
1696 // Additionally, walk though the uses of the converts and
1697 // store them conditionally for later processing.
1698 LLVM_DEBUG(dbgs() << "\nHandling spills....");
1699 Changed |= HandleSpills();
1700 SpillMIs.clear();
1701
1702 // Walk through the uses of the refill instructions.
1703 // Process them if they are used as qf operands.
1704 LLVM_DEBUG(dbgs() << "\nCollecting refills....\n");
1705 for (NodeAddr<DefNode *> DfNode : RefillMIs) {
1706
1707 NodeAddr<StmtNode *> Stmt = DfNode.Addr->getOwner(*DFG);
1708 MachineInstr *DefMI = Stmt.Addr->getCode();
1709 collectQFUses(DfNode, DefMI);
1710 collectConvQFInstr(DfNode);
1711 }
1712 RefillMIs.clear();
1713
1714 LLVM_DEBUG(dbgs() << "\nHandling copies....");
1715 Changed |= HandleCopies();
1716 QFCopys.clear();
1717 PossibleMultiReachDefs.clear();
1718 ReachDefOfCopies.clear();
1719 ConvertToQfCopies.clear();
1720
1721 LLVM_DEBUG(dbgs() << "\n === QF Uses map === "; for (auto It : QFUsesMap) {
1722 dbgs() << "\nInstruction: ";
1723 It.first->dump();
1724 dbgs() << "\t Property: " << It.second.first << " ," << It.second.second;
1725 });
1726
1727 // Insert new opcodes as applicable for the refill uses.
1728 // Delete the original instructions.
1729 Changed |= HandleRefills();
1730
1731 // Handle non-saturating instructions by inserting convert(s) from sf to qf.
1732 Changed |= HandleNonSatInstr();
1733 QFNonSatMIs.clear();
1734 // Cleanup
1735 for (auto It : QFUsesMap)
1736 It.first->eraseFromParent();
1737 QFUsesMap.clear();
1738 IgnoreInsertConvList.clear();
1739
1740 return Changed;
1741}
1742
1743//===----------------------------------------------------------------------===//
1744// Public Constructor Functions
1745//===----------------------------------------------------------------------===//
1746INITIALIZE_PASS_BEGIN(HexagonPostRAHandleQFP, "handle-qfp-spills-refills",
1747 "Hexagon Post RA Handle QFloat", false, false)
1750INITIALIZE_PASS_END(HexagonPostRAHandleQFP, "handle-qfp-spills-refills",
1751 "Hexagon PostRA Handle QFloat", false, false)
1752
1754 return new HexagonPostRAHandleQFP();
1755}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Register UseReg(const MachineOperand &MO)
SmallVector< unsigned short, 5 > QFNonSatInstr
cl::opt< bool > DisablePostRAHandleQFloat("disable-handle-qfp", cl::init(false), cl::desc("Disable handling of Qfloat spills/refills after register " "allocation."))
DenseMap< unsigned short, std::pair< bool, bool > > QFPSatInstsMap
cl::opt< QFloatMode > QFloatModeValue
static void getAllRealUses(NodeAddr< DefNode * > DA, NodeSet &UNodeSet, Liveness *L, DataFlowGraph *G, bool comprehensive=false)
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
#define LLVM_DEBUG(...)
Definition Debug.h:119
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:275
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasQFPInstrs(const MachineFunction &MF) const
bool isQFP32Instr(MachineInstr *MI) const
bool usesQF16Operand(MachineInstr *MI, unsigned Index=0) const
bool isQFP16Instr(MachineInstr *MI) const
bool usesQF32Operand(MachineInstr *MI, unsigned Index=0) const
bool isMIBefore(const MachineInstr *A, const MachineInstr *B) const
bool isQFPInstr(MachineInstr *MI) const
bool usesQFOperand(MachineInstr *MI, unsigned Index=0) const
bool isFakeReg(MCPhysReg Reg) const
Returns true if the given reserved physical register Reg is live across function calls/returns.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
unsigned size() const
bool insert(SUnit *SU)
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
Print(const T &, const DataFlowGraph &) -> Print< T >
NodeAddr< StmtNode * > Stmt
Definition RDFGraph.h:391
uint32_t NodeId
Definition RDFGraph.h:262
std::set< NodeId > NodeSet
Definition RDFGraph.h:551
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
RegState
Flags to represent properties of register accesses.
void initializeHexagonPostRAHandleQFPPass(PassRegistry &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
char & HexagonPostRAHandleQFPID
FunctionPass * createHexagonPostRAHandleQFP()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
RegState getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
NodeList members_if(Predicate P, const DataFlowGraph &G) const
Definition RDFGraph.h:949
static bool IsDef(const Node BA)
Definition RDFGraph.h:827
static bool IsUse(const Node BA)
Definition RDFGraph.h:832
static bool IsCode(const Node BA)
Definition RDFGraph.h:823
NodeAddr< T > addr(NodeId N) const
Definition RDFGraph.h:692
LLVM_ABI Node getOwner(const DataFlowGraph &G)
Definition RDFGraph.cpp:525
DenseMap< RegisterId, NodeRefSet > RefMap
Definition RDFLiveness.h:59
LLVM_ABI std::pair< NodeSet, bool > getAllReachingDefsRec(RegisterRef RefRR, NodeAddr< RefNode * > RefA, NodeSet &Visited, const NodeSet &Defs)
MachineInstr * getCode() const
Definition RDFGraph.h:638