29 #include "llvm/IR/IntrinsicsHexagon.h"
51 #define DEBUG_TYPE "hexagon-vlcr"
53 STATISTIC(HexagonNumVectorLoopCarriedReuse,
54 "Number of values that were reused from a previous iteration.");
58 cl::desc(
"Maximum distance of loop carried dependences that are handled"),
74 ChainOfDependences Chain;
77 bool isIdentical(DepChain &Other)
const {
80 ChainOfDependences &OtherChain =
Other.getChain();
81 for (
int i = 0;
i <
size(); ++
i) {
82 if (Chain[
i] != OtherChain[
i])
88 ChainOfDependences &getChain() {
104 int iterations()
const {
109 return Chain.front();
125 const ChainOfDependences &CD =
D.Chain;
126 int ChainSize = CD.size();
127 OS <<
"**DepChain Start::**\n";
128 for (
int i = 0;
i < ChainSize -1; ++
i) {
129 OS << *(CD[
i]) <<
" -->\n";
131 OS << *CD[ChainSize-1] <<
"\n";
142 std::map<Instruction *, DepChain *> DepChains;
145 ReuseValue() =
default;
148 Inst2Replace =
nullptr;
149 BackedgeInst =
nullptr;
153 bool isDefined() {
return Inst2Replace !=
nullptr; }
158 OS <<
"** ReuseValue ***\n";
159 OS <<
"Instruction to Replace: " << *(RU.Inst2Replace) <<
"\n";
160 OS <<
"Backedge Instruction: " << *(RU.BackedgeInst) <<
"\n";
164 class HexagonVectorLoopCarriedReuseLegacyPass :
public LoopPass {
168 explicit HexagonVectorLoopCarriedReuseLegacyPass() :
LoopPass(
ID) {
174 return "Hexagon-specific loop carried reuse for HVX vectors";
187 class HexagonVectorLoopCarriedReuse {
189 HexagonVectorLoopCarriedReuse(
Loop *L) : CurLoop(L){};
195 std::set<Instruction *> ReplacedInsts;
197 ReuseValue ReuseCandidate;
200 void findLoopCarriedDeps();
201 void findValueToReuse();
216 "Hexagon-specific predictive commoning for HVX vectors",
228 HexagonVectorLoopCarriedReuse Vlcr(&L);
236 bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(
Loop *L,
240 HexagonVectorLoopCarriedReuse Vlcr(L);
245 if (!CurLoop->getLoopPreheader())
249 if (!CurLoop->getSubLoops().empty())
253 if (CurLoop->getNumBlocks() != 1)
259 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(
CallInst *
C) {
260 switch (
C->getCalledFunction()->getIntrinsicID()) {
261 case Intrinsic::hexagon_V6_vaddb:
262 case Intrinsic::hexagon_V6_vaddb_128B:
263 case Intrinsic::hexagon_V6_vaddh:
264 case Intrinsic::hexagon_V6_vaddh_128B:
265 case Intrinsic::hexagon_V6_vaddw:
266 case Intrinsic::hexagon_V6_vaddw_128B:
267 case Intrinsic::hexagon_V6_vaddubh:
268 case Intrinsic::hexagon_V6_vaddubh_128B:
269 case Intrinsic::hexagon_V6_vadduhw:
270 case Intrinsic::hexagon_V6_vadduhw_128B:
271 case Intrinsic::hexagon_V6_vaddhw:
272 case Intrinsic::hexagon_V6_vaddhw_128B:
273 case Intrinsic::hexagon_V6_vmaxb:
274 case Intrinsic::hexagon_V6_vmaxb_128B:
275 case Intrinsic::hexagon_V6_vmaxh:
276 case Intrinsic::hexagon_V6_vmaxh_128B:
277 case Intrinsic::hexagon_V6_vmaxw:
278 case Intrinsic::hexagon_V6_vmaxw_128B:
279 case Intrinsic::hexagon_V6_vmaxub:
280 case Intrinsic::hexagon_V6_vmaxub_128B:
281 case Intrinsic::hexagon_V6_vmaxuh:
282 case Intrinsic::hexagon_V6_vmaxuh_128B:
283 case Intrinsic::hexagon_V6_vminub:
284 case Intrinsic::hexagon_V6_vminub_128B:
285 case Intrinsic::hexagon_V6_vminuh:
286 case Intrinsic::hexagon_V6_vminuh_128B:
287 case Intrinsic::hexagon_V6_vminb:
288 case Intrinsic::hexagon_V6_vminb_128B:
289 case Intrinsic::hexagon_V6_vminh:
290 case Intrinsic::hexagon_V6_vminh_128B:
291 case Intrinsic::hexagon_V6_vminw:
292 case Intrinsic::hexagon_V6_vminw_128B:
293 case Intrinsic::hexagon_V6_vmpyub:
294 case Intrinsic::hexagon_V6_vmpyub_128B:
295 case Intrinsic::hexagon_V6_vmpyuh:
296 case Intrinsic::hexagon_V6_vmpyuh_128B:
297 case Intrinsic::hexagon_V6_vavgub:
298 case Intrinsic::hexagon_V6_vavgub_128B:
299 case Intrinsic::hexagon_V6_vavgh:
300 case Intrinsic::hexagon_V6_vavgh_128B:
301 case Intrinsic::hexagon_V6_vavguh:
302 case Intrinsic::hexagon_V6_vavguh_128B:
303 case Intrinsic::hexagon_V6_vavgw:
304 case Intrinsic::hexagon_V6_vavgw_128B:
305 case Intrinsic::hexagon_V6_vavgb:
306 case Intrinsic::hexagon_V6_vavgb_128B:
307 case Intrinsic::hexagon_V6_vavguw:
308 case Intrinsic::hexagon_V6_vavguw_128B:
309 case Intrinsic::hexagon_V6_vabsdiffh:
310 case Intrinsic::hexagon_V6_vabsdiffh_128B:
311 case Intrinsic::hexagon_V6_vabsdiffub:
312 case Intrinsic::hexagon_V6_vabsdiffub_128B:
313 case Intrinsic::hexagon_V6_vabsdiffuh:
314 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
315 case Intrinsic::hexagon_V6_vabsdiffw:
316 case Intrinsic::hexagon_V6_vabsdiffw_128B:
323 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(
Instruction *
I1,
325 if (!
I1->isSameOperationAs(I2))
332 if (
CallInst *C2 = dyn_cast<CallInst>(I2)) {
333 if (
C1->getCalledFunction() != C2->getCalledFunction())
341 unsigned NumOperands =
I1->getNumOperands();
342 for (
unsigned i = 0;
i < NumOperands; ++
i) {
355 bool HexagonVectorLoopCarriedReuse::canReplace(
Instruction *
I) {
361 case Intrinsic::hexagon_V6_hi:
362 case Intrinsic::hexagon_V6_lo:
363 case Intrinsic::hexagon_V6_hi_128B:
364 case Intrinsic::hexagon_V6_lo_128B:
365 LLVM_DEBUG(
dbgs() <<
"Not considering for reuse: " << *II <<
"\n");
371 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
372 for (
auto *
D : Dependences) {
373 LLVM_DEBUG(
dbgs() <<
"Processing dependence " << *(
D->front()) <<
"\n");
377 <<
".. Skipping because number of iterations > than the limit\n");
381 PHINode *PN = cast<PHINode>(
D->front());
383 int Iters =
D->iterations();
386 <<
" can be reused\n");
392 if (
User->getParent() !=
BB)
394 if (ReplacedInsts.count(
User)) {
396 <<
" has already been replaced. Skipping...\n");
399 if (isa<PHINode>(
User))
401 if (
User->mayHaveSideEffects())
403 if (!canReplace(
User))
406 PNUsers.push_back(
User);
408 LLVM_DEBUG(
dbgs() << PNUsers.size() <<
" use(s) of the PHI in the block\n");
417 for (
Use &U : BEInst->
uses()) {
418 Instruction *BEUser = cast<Instruction>(U.getUser());
422 if (!isEquivalentOperation(
I, BEUser))
425 int NumOperands =
I->getNumOperands();
436 std::map<Instruction *, DepChain *> DepChains;
438 if ((
I &&
I->isCommutative()) || (
C1 && isCallInstCommutative(
C1))) {
440 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
444 for (
int T = 0;
T < NumOperands; ++
T) {
446 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
447 if (!OpInst && !BEOpInst) {
454 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
457 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
461 DepChains[OpInst] =
D;
472 for (
int OpNo = 0; OpNo < NumOperands; ++OpNo) {
486 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
487 DepChain *
D = getDepChainBtwn(OpInst, BEOpInst, Iters);
490 DepChains[OpInst] =
D;
499 ReuseCandidate.Inst2Replace =
I;
500 ReuseCandidate.BackedgeInst = BEUser;
501 ReuseCandidate.DepChains = DepChains;
502 ReuseCandidate.Iterations = Iters;
505 ReuseCandidate.reset();
509 ReuseCandidate.reset();
512 Value *HexagonVectorLoopCarriedReuse::findValueInBlock(
Value *
Op,
520 void HexagonVectorLoopCarriedReuse::reuseValue() {
522 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
525 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
526 int Iterations = ReuseCandidate.Iterations;
527 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
528 assert(!DepChains.empty() &&
"No DepChains");
529 LLVM_DEBUG(
dbgs() <<
"reuseValue is making the following changes\n");
532 for (
int i = 0;
i < Iterations; ++
i) {
535 for (
int j = 0;
j < NumOperands; ++
j) {
540 DepChain &
D = *DepChains[
I];
544 Value *ValInPreheader = findValueInBlock(
D[
i], LoopPH);
547 InstsInPreheader.push_back(InstInPreheader);
548 InstInPreheader->
setName(Inst2Replace->
getName() +
".hexagon.vlcr");
555 IRB.SetInsertPoint(
BB->getFirstNonPHI());
556 Value *BEVal = BEInst;
558 for (
int i = Iterations-1;
i >=0 ; --
i) {
560 NewPhi = IRB.CreatePHI(InstInPreheader->
getType(), 2);
570 ReplacedInsts.insert(Inst2Replace);
571 ++HexagonNumVectorLoopCarriedReuse;
574 bool HexagonVectorLoopCarriedReuse::doVLCR() {
575 assert(CurLoop->getSubLoops().empty() &&
576 "Can do VLCR on the innermost loop only");
577 assert((CurLoop->getNumBlocks() == 1) &&
578 "Can do VLCR only on single block loops");
580 bool Changed =
false;
583 LLVM_DEBUG(
dbgs() <<
"Working on Loop: " << *CurLoop->getHeader() <<
"\n");
589 findLoopCarriedDeps();
591 if (ReuseCandidate.isDefined()) {
601 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(
Instruction *
I,
609 if (NumIncomingValues != 2) {
615 if (
BB != CurLoop->getHeader()) {
621 Instruction *BEInst = dyn_cast<Instruction>(BEVal);
624 assert(BEInst &&
"There should be a value over the backedge");
628 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
633 findDepChainFromPHI(BEInst,
D);
637 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(
Instruction *
I1,
640 for (
auto *
D : Dependences) {
641 if (
D->front() ==
I1 &&
D->back() == I2 &&
D->iterations() == Iters)
647 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
649 for (
auto I =
BB->begin(),
E =
BB->end();
I !=
E && isa<PHINode>(
I); ++
I) {
650 auto *PN = cast<PHINode>(
I);
651 if (!isa<VectorType>(PN->
getType()))
654 DepChain *
D =
new DepChain();
655 findDepChainFromPHI(PN, *
D);
657 Dependences.insert(
D);
661 LLVM_DEBUG(
dbgs() <<
"Found " << Dependences.size() <<
" dependences\n");
666 return new HexagonVectorLoopCarriedReuseLegacyPass();