42#include "llvm/IR/IntrinsicsHexagon.h"
76#define DEBUG_TYPE "hexagon-lir"
82 cl::desc(
"Disable generation of memcpy in loop idiom recognition"));
86 cl::desc(
"Disable generation of memmove in loop idiom recognition"));
90 "check guarding the memmove."));
94 cl::desc(
"Threshold (in bytes) to perform the transformation, if the "
95 "runtime loop count (mem transfer size) is known at compile-time."));
99 cl::desc(
"Only enable generating memmove in non-nested loops"));
103 cl::desc(
"Enable Hexagon-specific memcpy for volatile destination."));
109 =
"hexagon_memcpy_forward_vp4cp4n2";
121class HexagonLoopIdiomRecognize {
126 : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}
139 bool runOnCountableLoop(
Loop *L);
147 bool HasMemcpy, HasMemmove;
150class HexagonLoopIdiomRecognizeLegacyPass :
public LoopPass {
154 explicit HexagonLoopIdiomRecognizeLegacyPass() :
LoopPass(
ID) {
160 return "Recognize Hexagon-specific loop idioms";
185 void addRule(
StringRef N,
const Rule::FuncType &
F) {
186 Rules.push_back(Rule(
N,
F));
190 struct WorkListType {
191 WorkListType() =
default;
193 void push_back(
Value *V) {
195 if (S.insert(V).second)
199 Value *pop_front_val() {
206 bool empty()
const {
return Q.empty(); }
209 std::deque<Value *> Q;
213 using ValueSetType = std::set<Value *>;
215 std::vector<Rule> Rules;
237 friend struct Simplifier;
242 template <
typename FuncT>
void traverse(
Value *V, FuncT
F);
243 void record(
Value *V);
245 void unuse(
Value *V);
258 PE(
const Simplifier::Context &c,
Value *v =
nullptr) :
C(c),
V(
v) {}
260 const Simplifier::Context &
C;
266 P.C.print(
OS,
P.V ?
P.V :
P.C.Root);
272char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
275 "Recognize Hexagon-specific loop idioms",
false,
false)
286template <typename FuncT>
287void Simplifier::Context::traverse(
Value *V, FuncT
F) {
292 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
293 if (!U || U->getParent())
297 for (
Value *
Op : U->operands())
303 const auto *
U = dyn_cast<const Instruction>(V);
305 OS <<
V <<
'(' << *
V <<
')';
309 if (
U->getParent()) {
311 U->printAsOperand(
OS,
true);
316 unsigned N =
U->getNumOperands();
319 OS <<
U->getOpcodeName();
320 for (
const Value *
Op :
U->operands()) {
328void Simplifier::Context::initialize(
Instruction *Exp) {
338 Value *
V = Q.pop_front_val();
342 if (isa<PHINode>(U) ||
U->getParent() !=
Block)
346 M.insert({
U,
U->clone()});
350 for (std::pair<Value*,Value*>
P : M) {
352 for (
unsigned i = 0, n =
U->getNumOperands(); i != n; ++i) {
353 auto F =
M.find(
U->getOperand(i));
355 U->setOperand(i,
F->second);
359 auto R =
M.find(Exp);
367void Simplifier::Context::record(
Value *V) {
375void Simplifier::Context::use(
Value *V) {
383void Simplifier::Context::unuse(
Value *V) {
384 if (!isa<Instruction>(V) || cast<Instruction>(V)->
getParent() !=
nullptr)
405 Instruction *
U = dyn_cast<Instruction>(Q.pop_front_val());
407 if (!U ||
U->getParent())
409 for (
unsigned i = 0, n =
U->getNumOperands(); i != n; ++i) {
412 U->setOperand(i, NewV);
422void Simplifier::Context::replace(
Value *OldV,
Value *NewV) {
438 Value *
V = Q.pop_front_val();
440 if (!U ||
U->getParent())
444 NewV = subst(NewV, V, DupV);
452 Root = subst(Root, OldV, NewV);
456void Simplifier::Context::cleanup() {
457 for (
Value *V : Clones) {
460 U->dropAllReferences();
463 for (
Value *V : Clones) {
474 if (!
I->isSameOperationAs(J))
477 return I->isIdenticalTo(J);
479 for (
unsigned i = 0, n =
I->getNumOperands(); i != n; ++i) {
483 auto *InI = dyn_cast<const Instruction>(OpI);
484 auto *InJ = dyn_cast<const Instruction>(OpJ);
486 if (!
equal(InI, InJ))
488 }
else if (InI != InJ || !InI)
500 Value *
V = Q.pop_front_val();
504 if (!U ||
U->getParent())
506 if (SubI &&
equal(SubI, U))
525 I->insertInto(
B, At);
530 if (
Instruction *RootI = dyn_cast<Instruction>(Root))
535Value *Simplifier::simplify(Context &
C) {
542 if (Count++ >= Limit)
544 Instruction *
U = dyn_cast<Instruction>(Q.pop_front_val());
545 if (!U ||
U->getParent() || !
C.Used.count(U))
547 bool Changed =
false;
548 for (Rule &R : Rules) {
563 return Count < Limit ?
C.Root :
nullptr;
574 class PolynomialMultiplyRecognize {
576 explicit PolynomialMultiplyRecognize(
Loop *loop,
const DataLayout &dl,
579 : CurLoop(loop),
DL(dl), DT(dt), TLI(tli), SE(se) {}
587 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
599 bool classifyInst(
Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
601 bool highBitsAreZero(
Value *V,
unsigned IterCount);
602 bool keepsHighBitsZero(
Value *V,
unsigned IterCount);
608 struct ParsedValues {
609 ParsedValues() =
default;
617 unsigned IterCount = 0;
623 bool matchRightShift(
SelectInst *SelI, ParsedValues &PV);
625 Value *CIV, ParsedValues &PV,
bool PreScan);
626 unsigned getInverseMxN(
unsigned QP);
629 void setupPreSimplifier(Simplifier &S);
630 void setupPostSimplifier(Simplifier &S);
643 if (std::distance(PI, PE) != 2)
647 for (
auto I = BB->
begin(),
E = BB->
end();
I !=
E && isa<PHINode>(
I); ++
I) {
648 auto *PN = cast<PHINode>(
I);
649 Value *InitV = PN->getIncomingValueForBlock(
PB);
650 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->
isZero())
652 Value *IterV = PN->getIncomingValueForBlock(BB);
653 auto *BO = dyn_cast<BinaryOperator>(IterV);
656 if (BO->getOpcode() != Instruction::Add)
658 Value *IncV =
nullptr;
659 if (BO->getOperand(0) == PN)
660 IncV = BO->getOperand(1);
661 else if (BO->getOperand(1) == PN)
662 IncV = BO->getOperand(0);
666 if (
auto *
T = dyn_cast<ConstantInt>(IncV))
674 for (
auto UI =
I->user_begin(), UE =
I->user_end(); UI != UE;) {
675 Use &TheUse = UI.getUse();
677 if (
auto *II = dyn_cast<Instruction>(TheUse.
getUser()))
679 II->replaceUsesOfWith(
I, J);
683bool PolynomialMultiplyRecognize::matchLeftShift(
SelectInst *SelI,
684 Value *CIV, ParsedValues &PV) {
696 using namespace PatternMatch;
699 Value *
A =
nullptr, *
B =
nullptr, *
C =
nullptr;
709 Value *
X =
nullptr, *Sh1 =
nullptr;
737 Value *ShouldSameV =
nullptr, *ShouldXoredV =
nullptr;
740 ShouldXoredV = FalseV;
742 ShouldSameV = FalseV;
743 ShouldXoredV = TrueV;
746 Value *Q =
nullptr, *
R =
nullptr, *
Y =
nullptr, *
Z =
nullptr;
752 if (ShouldSameV ==
Y)
754 else if (ShouldSameV == Z)
798bool PolynomialMultiplyRecognize::matchRightShift(
SelectInst *SelI,
811 using namespace PatternMatch;
841 Value *
R =
nullptr, *Q =
nullptr;
873bool PolynomialMultiplyRecognize::scanSelect(
SelectInst *SelI,
876 using namespace PatternMatch;
915 if (matchLeftShift(SelI, CIV, PV)) {
921 auto *RPhi = dyn_cast<PHINode>(PV.R);
924 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
930 if (CurLoop->isLoopInvariant(PV.X)) {
940 Value *Var =
nullptr, *Inv =
nullptr, *X1 =
nullptr, *X2 =
nullptr;
943 auto *
I1 = dyn_cast<Instruction>(X1);
944 auto *I2 = dyn_cast<Instruction>(X2);
945 if (!I1 ||
I1->getParent() != LoopB) {
948 }
else if (!I2 || I2->getParent() != LoopB) {
959 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
966 if (matchRightShift(SelI, PV)) {
969 if (PV.Inv && !isa<ConstantInt>(PV.Q))
980bool PolynomialMultiplyRecognize::isPromotableTo(
Value *Val,
998 switch (
In->getOpcode()) {
999 case Instruction::PHI:
1000 case Instruction::ZExt:
1001 case Instruction::And:
1002 case Instruction::Or:
1003 case Instruction::Xor:
1004 case Instruction::LShr:
1005 case Instruction::Select:
1006 case Instruction::Trunc:
1008 case Instruction::ICmp:
1009 if (
CmpInst *CI = cast<CmpInst>(In))
1010 return CI->isEquality() || CI->isUnsigned();
1012 case Instruction::Add:
1013 return In->hasNoSignedWrap() &&
In->hasNoUnsignedWrap();
1018void PolynomialMultiplyRecognize::promoteTo(
Instruction *In,
1020 Type *OrigTy =
In->getType();
1024 if (!
In->getType()->isIntegerTy(1))
1025 In->mutateType(DestTy);
1029 if (
PHINode *
P = dyn_cast<PHINode>(In)) {
1030 unsigned N =
P->getNumIncomingValues();
1031 for (
unsigned i = 0; i !=
N; ++i) {
1035 Value *InV =
P->getIncomingValue(i);
1038 if (Ty !=
P->getType()) {
1043 P->setIncomingValue(i, InV);
1046 }
else if (
ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1048 if (
Op->getType() ==
Z->getType())
1049 Z->replaceAllUsesWith(
Op);
1050 Z->eraseFromParent();
1053 if (
TruncInst *
T = dyn_cast<TruncInst>(In)) {
1057 T->replaceAllUsesWith(
And);
1058 T->eraseFromParent();
1063 for (
unsigned i = 0, n =
In->getNumOperands(); i != n; ++i) {
1064 if (
ConstantInt *CI = dyn_cast<ConstantInt>(
In->getOperand(i)))
1065 if (CI->getType()->getBitWidth() < DestBW)
1070bool PolynomialMultiplyRecognize::promoteTypes(
BasicBlock *LoopB,
1084 if (
P.getNumIncomingValues() != 1)
1086 assert(
P.getIncomingBlock(0) == LoopB);
1088 if (!
T ||
T->getBitWidth() > DestBW)
1094 if (!
In.isTerminator() && !isPromotableTo(&In, DestTy))
1098 std::vector<Instruction*> LoopIns;
1099 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1102 if (!
In->isTerminator())
1103 promoteTo(In, DestTy, LoopB);
1112 Type *Ty0 =
P->getIncomingValue(0)->getType();
1113 Type *PTy =
P->getType();
1121 P->replaceAllUsesWith(
T);
1124 cast<Instruction>(
T)->setOperand(0,
P);
1131bool PolynomialMultiplyRecognize::findCycle(
Value *Out,
Value *In,
1137 auto *BB = cast<Instruction>(Out)->
getParent();
1138 bool HadPhi =
false;
1140 for (
auto *U : Out->
users()) {
1141 auto *
I = dyn_cast<Instruction>(&*U);
1142 if (
I ==
nullptr ||
I->getParent() != BB)
1149 bool IsPhi = isa<PHINode>(
I);
1150 if (IsPhi && HadPhi)
1155 if (findCycle(
I, In,
Cycle))
1159 return !
Cycle.empty();
1162void PolynomialMultiplyRecognize::classifyCycle(
Instruction *DivI,
1163 ValueSeq &
Cycle, ValueSeq &Early, ValueSeq &Late) {
1170 for (
I = 0;
I <
N; ++
I) {
1174 else if (!isa<PHINode>(V))
1181 ValueSeq &
First = !IsE ? Early : Late;
1182 for (
unsigned J = 0; J <
I; ++J)
1185 ValueSeq &Second = IsE ? Early : Late;
1187 for (++
I;
I <
N; ++
I) {
1189 if (DivI == V || isa<PHINode>(V))
1198bool PolynomialMultiplyRecognize::classifyInst(
Instruction *UseI,
1199 ValueSeq &Early, ValueSeq &Late) {
1203 if (UseI->
getOpcode() == Instruction::Select) {
1205 if (Early.count(TV) || Early.count(FV)) {
1206 if (Late.count(TV) || Late.count(FV))
1209 }
else if (Late.count(TV) || Late.count(FV)) {
1210 if (Early.count(TV) || Early.count(FV))
1222 bool AE =
true,
AL =
true;
1224 if (Early.count(&*
I))
1226 else if (Late.count(&*
I))
1250bool PolynomialMultiplyRecognize::commutesWithShift(
Instruction *
I) {
1251 switch (
I->getOpcode()) {
1252 case Instruction::And:
1253 case Instruction::Or:
1254 case Instruction::Xor:
1255 case Instruction::LShr:
1256 case Instruction::Shl:
1257 case Instruction::Select:
1258 case Instruction::ICmp:
1259 case Instruction::PHI:
1267bool PolynomialMultiplyRecognize::highBitsAreZero(
Value *V,
1268 unsigned IterCount) {
1269 auto *
T = dyn_cast<IntegerType>(
V->getType());
1275 return Known.countMinLeadingZeros() >= IterCount;
1278bool PolynomialMultiplyRecognize::keepsHighBitsZero(
Value *V,
1279 unsigned IterCount) {
1282 if (
auto *
C = dyn_cast<ConstantInt>(V))
1283 return C->getValue().countl_zero() >= IterCount;
1285 if (
auto *
I = dyn_cast<Instruction>(V)) {
1286 switch (
I->getOpcode()) {
1287 case Instruction::And:
1288 case Instruction::Or:
1289 case Instruction::Xor:
1290 case Instruction::LShr:
1291 case Instruction::Select:
1292 case Instruction::ICmp:
1293 case Instruction::PHI:
1294 case Instruction::ZExt:
1303 unsigned Opc =
I->getOpcode();
1304 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1305 return Op !=
I->getOperand(1);
1309bool PolynomialMultiplyRecognize::convertShiftsToLeft(
BasicBlock *LoopB,
1311 Value *CIV = getCountIV(LoopB);
1314 auto *CIVTy = dyn_cast<IntegerType>(CIV->
getType());
1315 if (CIVTy ==
nullptr)
1319 ValueSeq Early, Late, Cycled;
1323 using namespace PatternMatch;
1329 if (!findCycle(&
I, V,
C))
1334 classifyCycle(&
I,
C, Early, Late);
1335 Cycled.insert(
C.begin(),
C.end());
1341 ValueSeq
Users(Cycled.begin(), Cycled.end());
1342 for (
unsigned i = 0; i <
Users.size(); ++i) {
1344 if (!isa<IntegerType>(
V->getType()))
1346 auto *
R = cast<Instruction>(V);
1349 if (!commutesWithShift(R))
1351 for (
User *U :
R->users()) {
1352 auto *
T = cast<Instruction>(U);
1356 if (
T->getParent() != LoopB || RShifts.count(
T) || isa<PHINode>(
T))
1360 if (!classifyInst(
T, Early, Late))
1369 ValueSeq Internal(
Users.begin(),
Users.end());
1371 for (
unsigned i = 0; i < Internal.size(); ++i) {
1372 auto *
R = dyn_cast<Instruction>(Internal[i]);
1376 auto *
T = dyn_cast<Instruction>(
Op);
1377 if (
T &&
T->getParent() != LoopB)
1380 Internal.insert(
Op);
1383 for (
Value *V : Inputs)
1384 if (!highBitsAreZero(V, IterCount))
1386 for (
Value *V : Internal)
1387 if (!keepsHighBitsZero(V, IterCount))
1392 std::map<Value*,Value*> ShiftMap;
1394 using CastMapType = std::map<std::pair<Value *, Type *>,
Value *>;
1396 CastMapType CastMap;
1400 auto H = CM.find(std::make_pair(V, Ty));
1403 Value *CV = IRB.CreateIntCast(V, Ty,
false);
1404 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1408 for (
auto I = LoopB->begin(),
E = LoopB->end();
I !=
E; ++
I) {
1409 using namespace PatternMatch;
1411 if (isa<PHINode>(
I) || !
Users.count(&*
I))
1422 for (
auto &J :
I->operands()) {
1424 if (!isOperandShifted(&*
I,
Op))
1429 if (isa<ConstantInt>(
Op) && cast<ConstantInt>(
Op)->
isZero())
1432 auto F = ShiftMap.find(
Op);
1433 Value *
W = (
F != ShiftMap.end()) ?
F->second :
nullptr;
1435 IRB.SetInsertPoint(&*
I);
1439 Value *ShAmt = CIV, *ShVal =
Op;
1440 auto *VTy = cast<IntegerType>(ShVal->getType());
1441 auto *ATy = cast<IntegerType>(ShAmt->
getType());
1442 if (Late.count(&*
I))
1447 if (VTy->getBitWidth() < ATy->getBitWidth())
1448 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1450 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1453 W = IRB.CreateShl(ShVal, ShAmt);
1454 ShiftMap.insert(std::make_pair(
Op, W));
1456 I->replaceUsesOfWith(
Op, W);
1466 for (
auto P = ExitB->
begin(), Q = ExitB->
end();
P != Q; ++
P) {
1467 if (!isa<PHINode>(
P))
1469 auto *PN = cast<PHINode>(
P);
1470 Value *
U = PN->getIncomingValueForBlock(LoopB);
1471 if (!
Users.count(U))
1479 cast<User>(S)->replaceUsesOfWith(S, PN);
1485void PolynomialMultiplyRecognize::cleanupLoopBody(
BasicBlock *LoopB) {
1486 for (
auto &
I : *LoopB)
1488 I.replaceAllUsesWith(SV);
1494unsigned PolynomialMultiplyRecognize::getInverseMxN(
unsigned QP) {
1497 std::array<char,32> Q,
C;
1499 for (
unsigned i = 0; i < 32; ++i) {
1516 for (
unsigned i = 1; i < 32; ++i) {
1524 for (
unsigned j = 0;
j < i; ++
j)
1525 T =
T ^ (
C[j] & Q[i-j]);
1530 for (
unsigned i = 0; i < 32; ++i)
1540 Module *
M = At->getParent()->getParent()->getParent();
1543 Value *
P = PV.P, *Q = PV.Q, *P0 =
P;
1544 unsigned IC = PV.IterCount;
1546 if (PV.M !=
nullptr)
1547 P0 =
P =
B.CreateXor(
P, PV.M);
1552 if (PV.IterCount != 32)
1553 P =
B.CreateAnd(
P, BMI);
1556 auto *QI = dyn_cast<ConstantInt>(PV.Q);
1557 assert(QI && QI->getBitWidth() <= 32);
1560 unsigned M = (1 << PV.IterCount) - 1;
1561 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1562 unsigned QV = getInverseMxN(Tmp) &
M;
1564 P =
B.CreateCall(PMF, {
P, QVI});
1565 P =
B.CreateTrunc(
P, QI->getType());
1567 P =
B.CreateAnd(
P, BMI);
1570 Value *
R =
B.CreateCall(PMF, {
P, Q});
1572 if (PV.M !=
nullptr)
1573 R =
B.CreateXor(R,
B.CreateIntCast(P0,
R->getType(),
false));
1579 if (
const auto *CI = dyn_cast<const ConstantInt>(V))
1580 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1584 switch (
I->getOpcode()) {
1585 case Instruction::LShr:
1586 if (
const auto SI = dyn_cast<const ConstantInt>(
I->getOperand(1)))
1587 return SI->getZExtValue() > 0;
1589 case Instruction::Or:
1590 case Instruction::Xor:
1593 case Instruction::And:
1600void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1601 S.addRule(
"sink-zext",
1604 if (
I->getOpcode() != Instruction::ZExt)
1609 switch (
T->getOpcode()) {
1610 case Instruction::And:
1611 case Instruction::Or:
1612 case Instruction::Xor:
1618 return B.CreateBinOp(cast<BinaryOperator>(
T)->
getOpcode(),
1619 B.CreateZExt(
T->getOperand(0),
I->getType()),
1620 B.CreateZExt(
T->getOperand(1),
I->getType()));
1622 S.addRule(
"xor/and -> and/xor",
1625 if (
I->getOpcode() != Instruction::Xor)
1627 Instruction *And0 = dyn_cast<Instruction>(
I->getOperand(0));
1628 Instruction *And1 = dyn_cast<Instruction>(
I->getOperand(1));
1631 if (And0->getOpcode() != Instruction::And ||
1632 And1->getOpcode() != Instruction::And)
1634 if (And0->getOperand(1) != And1->getOperand(1))
1637 return B.CreateAnd(
B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1638 And0->getOperand(1));
1640 S.addRule(
"sink binop into select",
1650 Value *
X = Sel->getTrueValue(), *
Y = Sel->getFalseValue();
1652 return B.CreateSelect(Sel->getCondition(),
1653 B.CreateBinOp(
Op,
X, Z),
1654 B.CreateBinOp(
Op,
Y, Z));
1659 Value *
Y = Sel->getTrueValue(), *
Z = Sel->getFalseValue();
1660 return B.CreateSelect(Sel->getCondition(),
1661 B.CreateBinOp(
Op,
X,
Y),
1662 B.CreateBinOp(
Op,
X, Z));
1666 S.addRule(
"fold select-select",
1676 if (Sel0->getCondition() ==
C)
1680 if (Sel1->getCondition() ==
C)
1681 return B.CreateSelect(
C, Sel->
getTrueValue(), Sel1->getFalseValue());
1685 S.addRule(
"or-signbit -> xor-signbit",
1688 if (
I->getOpcode() != Instruction::Or)
1690 ConstantInt *Msb = dyn_cast<ConstantInt>(
I->getOperand(1));
1697 S.addRule(
"sink lshr into binop",
1700 if (
I->getOpcode() != Instruction::LShr)
1706 case Instruction::And:
1707 case Instruction::Or:
1708 case Instruction::Xor:
1714 Value *S =
I->getOperand(1);
1719 S.addRule(
"expose bitop-const",
1722 auto IsBitOp = [](
unsigned Op) ->
bool {
1724 case Instruction::And:
1725 case Instruction::Or:
1726 case Instruction::Xor:
1732 if (!BitOp1 || !IsBitOp(BitOp1->
getOpcode()))
1735 if (!BitOp2 || !IsBitOp(BitOp2->
getOpcode()))
1748void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1749 S.addRule(
"(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1751 if (
I->getOpcode() != Instruction::And)
1754 ConstantInt *C0 = dyn_cast<ConstantInt>(
I->getOperand(1));
1757 if (
Xor->getOpcode() != Instruction::Xor)
1762 if (!And0 || And0->getOpcode() != Instruction::And)
1764 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1769 if (V0 != (V0 & V1))
1772 return B.CreateAnd(
B.CreateXor(And0->getOperand(0), And1), C0);
1776bool PolynomialMultiplyRecognize::recognize() {
1777 LLVM_DEBUG(
dbgs() <<
"Starting PolynomialMultiplyRecognize on loop\n"
1778 << *CurLoop <<
'\n');
1787 if (LoopB != CurLoop->getLoopLatch())
1790 if (ExitB ==
nullptr)
1792 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1793 if (EntryB ==
nullptr)
1796 unsigned IterCount = 0;
1797 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1798 if (isa<SCEVCouldNotCompute>(CT))
1800 if (
auto *CV = dyn_cast<SCEVConstant>(CT))
1801 IterCount = CV->getValue()->getZExtValue() + 1;
1803 Value *CIV = getCountIV(LoopB);
1806 PV.IterCount = IterCount;
1807 LLVM_DEBUG(
dbgs() <<
"Loop IV: " << *CIV <<
"\nIterCount: " << IterCount
1810 setupPreSimplifier(PreSimp);
1818 bool FoundPreScan =
false;
1819 auto FeedsPHI = [LoopB](
const Value *
V) ->
bool {
1820 for (
const Value *U :
V->users()) {
1821 if (
const auto *
P = dyn_cast<const PHINode>(U))
1822 if (
P->getParent() == LoopB)
1829 if (!SI || !FeedsPHI(SI))
1832 Simplifier::Context
C(SI);
1833 Value *
T = PreSimp.simplify(
C);
1834 SelectInst *SelI = (
T && isa<SelectInst>(
T)) ? cast<SelectInst>(
T) :
SI;
1835 LLVM_DEBUG(
dbgs() <<
"scanSelect(pre-scan): " << PE(
C, SelI) <<
'\n');
1836 if (scanSelect(SelI, LoopB, EntryB, CIV, PV,
true)) {
1837 FoundPreScan =
true;
1839 Value *NewSel =
C.materialize(LoopB,
SI->getIterator());
1840 SI->replaceAllUsesWith(NewSel);
1847 if (!FoundPreScan) {
1857 if (!promoteTypes(LoopB, ExitB))
1860 Simplifier PostSimp;
1861 setupPostSimplifier(PostSimp);
1864 if (!SI || !FeedsPHI(SI))
1866 Simplifier::Context
C(SI);
1867 Value *
T = PostSimp.simplify(
C);
1868 SelectInst *SelI = dyn_cast_or_null<SelectInst>(
T);
1870 Value *NewSel =
C.materialize(LoopB,
SI->getIterator());
1871 SI->replaceAllUsesWith(NewSel);
1877 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1879 cleanupLoopBody(LoopB);
1883 bool FoundScan =
false;
1885 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1889 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV,
false);
1898 dbgs() <<
"Found pmpy idiom: R = " << PP <<
".Q\n";
1900 dbgs() <<
"Found inverse pmpy idiom: R = (" << PP <<
"/Q).Q) + "
1902 dbgs() <<
" Res:" << *PV.Res <<
"\n P:" << *PV.P <<
"\n";
1904 dbgs() <<
" M:" << *PV.M <<
"\n";
1905 dbgs() <<
" Q:" << *PV.Q <<
"\n";
1906 dbgs() <<
" Iteration count:" << PV.IterCount <<
"\n";
1910 Value *PM = generate(At, PV);
1914 if (PM->
getType() != PV.Res->getType())
1918 PV.Res->eraseFromParent();
1922int HexagonLoopIdiomRecognize::getSCEVStride(
const SCEVAddRecExpr *S) {
1924 return SC->getAPInt().getSExtValue();
1928bool HexagonLoopIdiomRecognize::isLegalStore(
Loop *CurLoop,
StoreInst *SI) {
1933 Value *StoredVal =
SI->getValueOperand();
1934 Value *StorePtr =
SI->getPointerOperand();
1938 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1944 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1945 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1950 int Stride = getSCEVStride(StoreEv);
1953 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
1954 if (StoreSize !=
unsigned(std::abs(Stride)))
1958 LoadInst *LI = dyn_cast<LoadInst>(
SI->getValueOperand());
1966 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1967 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1971 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1983 const SCEV *BECount,
unsigned StoreSize,
1993 if (
const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
2003 for (
auto *
B : L->blocks())
2005 if (Ignored.
count(&
I) == 0 &&
2012void HexagonLoopIdiomRecognize::collectStores(
Loop *CurLoop,
BasicBlock *BB,
2017 if (isLegalStore(CurLoop, SI))
2021bool HexagonLoopIdiomRecognize::processCopyingStore(
Loop *CurLoop,
2024 "Expected only non-volatile stores, or Hexagon-specific memcpy"
2025 "to volatile destination.");
2027 Value *StorePtr =
SI->getPointerOperand();
2028 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2029 unsigned Stride = getSCEVStride(StoreEv);
2030 unsigned StoreSize =
DL->getTypeStoreSize(
SI->getValueOperand()->getType());
2031 if (Stride != StoreSize)
2037 auto *LI = cast<LoadInst>(
SI->getValueOperand());
2048 Type *IntPtrTy =
Builder.getIntPtrTy(*
DL,
SI->getPointerAddressSpace());
2056 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2057 Builder.getPtrTy(
SI->getPointerAddressSpace()), ExpPt);
2058 Value *LoadBasePtr =
nullptr;
2060 bool Overlap =
false;
2061 bool DestVolatile =
SI->isVolatile();
2067 if (StoreSize != 4 ||
DL->getTypeSizeInBits(BECountTy) > 32) {
2071 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2073 StoreBasePtr =
nullptr;
2077 LoadBasePtr =
nullptr;
2086 StoreSize, *AA, Ignore1)) {
2090 BECount, StoreSize, *AA, Ignore1)) {
2092 goto CleanupAndExit;
2100 goto CleanupAndExit;
2105 if (
Func->hasFnAttribute(Attribute::AlwaysInline))
2106 goto CleanupAndExit;
2115 if (!coverLoop(CurLoop, Insts))
2116 goto CleanupAndExit;
2119 goto CleanupAndExit;
2122 goto CleanupAndExit;
2127 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2133 StoreSize, *AA, Ignore2))
2134 goto CleanupAndExit;
2137 bool StridePos = getSCEVStride(LoadEv) >= 0;
2140 if (!StridePos && DestVolatile)
2141 goto CleanupAndExit;
2143 bool RuntimeCheck = (Overlap || DestVolatile);
2150 if (ExitBlocks.
size() != 1)
2151 goto CleanupAndExit;
2152 ExitB = ExitBlocks[0];
2158 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2161 const SCEV *NumBytesS =
2162 SE->getAddExpr(BECount, SE->getOne(IntPtrTy),
SCEV::FlagNUW);
2164 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2166 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2167 if (
Instruction *In = dyn_cast<Instruction>(NumBytes))
2175 if (
ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2177 if (Threshold != 0 &&
C < Threshold)
2178 goto CleanupAndExit;
2180 goto CleanupAndExit;
2185 Loop *ParentL = LF->getLoopFor(Preheader);
2186 StringRef HeaderName = Header->getName();
2195 for (
auto &In : *Header) {
2196 PHINode *PN = dyn_cast<PHINode>(&In);
2203 DT->addNewBlock(NewPreheader, Preheader);
2204 DT->changeImmediateDominator(Header, NewPreheader);
2212 Value *LA =
Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2213 Value *SA =
Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2214 Value *LowA = StridePos ? SA : LA;
2215 Value *HighA = StridePos ? LA : SA;
2226 if (Threshold != 0) {
2234 Func, NewPreheader);
2238 Builder.CreateCondBr(
Cond, MemmoveB, NewPreheader);
2241 DT->addNewBlock(MemmoveB, Preheader);
2245 ExitD = DT->findNearestCommonDominator(ExitD,
PB);
2253 if (ExitD && DT->dominates(Preheader, ExitD)) {
2261 CondBuilder.CreateBr(ExitB);
2266 Type *PtrTy = PointerType::get(Ctx, 0);
2273 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount,
Int32Ty);
2275 Value *NumWords = Expander.expandCodeFor(NumWordsS,
Int32Ty,
2277 if (
Instruction *In = dyn_cast<Instruction>(NumWords))
2281 NewCall = CondBuilder.CreateCall(Fn,
2282 {StoreBasePtr, LoadBasePtr, NumWords});
2284 NewCall = CondBuilder.CreateMemMove(
2285 StoreBasePtr,
SI->getAlign(), LoadBasePtr, LI->
getAlign(), NumBytes);
2288 NewCall =
Builder.CreateMemCpy(StoreBasePtr,
SI->getAlign(), LoadBasePtr,
2297 LLVM_DEBUG(
dbgs() <<
" Formed " << (Overlap ?
"memmove: " :
"memcpy: ")
2299 <<
" from load ptr=" << *LoadEv <<
" at: " << *LI <<
"\n"
2300 <<
" from store ptr=" << *StoreEv <<
" at: " << *SI
2309bool HexagonLoopIdiomRecognize::coverLoop(
Loop *L,
2312 for (
auto *
B :
L->blocks())
2321 for (
unsigned i = 0; i < Worklist.size(); ++i) {
2323 for (
auto I =
In->op_begin(),
E =
In->op_end();
I !=
E; ++
I) {
2330 Worklist.insert(OpI);
2338 for (
auto *
B :
L->blocks()) {
2339 for (
auto &In : *
B) {
2340 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2342 if (!Worklist.count(&In) &&
In.mayHaveSideEffects())
2344 for (
auto *K :
In.users()) {
2349 if (LF->getLoopFor(UseB) != L)
2361bool HexagonLoopIdiomRecognize::runOnLoopBlock(
Loop *CurLoop,
BasicBlock *BB,
2366 auto DominatedByBB = [
this,BB] (
BasicBlock *EB) ->
bool {
2367 return DT->dominates(BB, EB);
2369 if (!
all_of(ExitBlocks, DominatedByBB))
2372 bool MadeChange =
false;
2375 collectStores(CurLoop, BB, Stores);
2378 for (
auto &SI : Stores)
2379 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2384bool HexagonLoopIdiomRecognize::runOnCountableLoop(
Loop *L) {
2385 PolynomialMultiplyRecognize PMR(L, *
DL, *DT, *TLI, *SE);
2386 if (PMR.recognize())
2389 if (!HasMemcpy && !HasMemmove)
2392 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2393 assert(!isa<SCEVCouldNotCompute>(BECount) &&
2394 "runOnCountableLoop() called on a loop without a predictable"
2395 "backedge-taken count");
2398 L->getUniqueExitBlocks(ExitBlocks);
2400 bool Changed =
false;
2403 for (
auto *BB :
L->getBlocks()) {
2405 if (LF->getLoopFor(BB) != L)
2407 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2413bool HexagonLoopIdiomRecognize::run(
Loop *L) {
2414 const Module &
M = *
L->getHeader()->getParent()->getParent();
2420 if (!
L->getLoopPreheader())
2425 if (
Name ==
"memset" ||
Name ==
"memcpy" ||
Name ==
"memmove")
2428 DL = &
L->getHeader()->getModule()->getDataLayout();
2430 HasMemcpy = TLI->has(LibFunc_memcpy);
2431 HasMemmove = TLI->has(LibFunc_memmove);
2433 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2434 return runOnCountableLoop(L);
2438bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(
Loop *L,
2443 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2444 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2445 auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2446 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2447 *
L->getHeader()->getParent());
2448 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2449 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
2453 return new HexagonLoopIdiomRecognizeLegacyPass();
2460 return HexagonLoopIdiomRecognize(&AR.
AA, &AR.
DT, &AR.
LI, &AR.
TLI, &AR.
SE)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_USED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static cl::opt< unsigned > SimplifyLimit("hlir-simplify-limit", cl::init(10000), cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"))
hexagon loop Recognize Hexagon specific loop idioms
static cl::opt< bool > DisableMemcpyIdiom("disable-memcpy-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memcpy in loop idiom recognition"))
static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB)
static cl::opt< unsigned > RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " "check guarding the memmove."))
static cl::opt< bool > HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), cl::desc("Enable Hexagon-specific memcpy for volatile destination."))
static cl::opt< bool > DisableMemmoveIdiom("disable-memmove-idiom", cl::Hidden, cl::init(false), cl::desc("Disable generation of memmove in loop idiom recognition"))
static cl::opt< unsigned > CompileTimeMemSizeThreshold("compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), cl::desc("Threshold (in bytes) to perform the transformation, if the " "runtime loop count (mem transfer size) is known at compile-time."))
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
static const char * HexagonVolatileMemcpyName
static bool hasZeroSignBit(const Value *V)
static cl::opt< bool > OnlyNonNestedMemmove("only-nonnested-memmove-idiom", cl::Hidden, cl::init(true), cl::desc("Only enable generating memmove in non-nested loops"))
iv Induction Variable Users
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Move duplicate certain instructions close to their use
This header provides classes for managing per-loop analyses.
This file provides utility analysis objects describing memory locations.
Module.h This file contains the declarations for the Module class.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
void setIDom(DomTreeNodeBase *NewIDom)
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
A possibly irreducible generalization of a Loop.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const BasicBlock * getParent() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
uint64_t getSignBit() const
Return a uint64_t with just the most significant bit set (the sign bit, if the value is treated as a ...
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
BlockT * getHeader() const
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all unique successor blocks of this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
The legacy pass manager's analysis pass to compute loop information.
virtual bool runOnLoop(Loop *L, LPPassManager &LPM)=0
Represents a single loop in the control flow graph.
Representation for a specific memory location.
A Module instance is used to store all the information related to an LLVM module.
void setIncomingBlock(unsigned i, BasicBlock *BB)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents a polynomial recurrence on the trip count of the specified loop.
This class represents a constant integer value.
This class uses information about analyze scalars to rewrite expressions in canonical form.
const SCEV * getOperand(unsigned i) const
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
const Value * getTrueValue() const
A vector that has set insertion semantics.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Provides information about what library functions are available for the current target.
Triple - Helper class for working with autoconf configuration names.
ArchType getArch() const
Get the parsed architecture type of this triple.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
User * getUser() const
Returns the User that contains this Use.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
This class represents zero extension of integer types.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
void link(std::unique_ptr< LinkGraph > G, std::unique_ptr< JITLinkContext > Ctx)
Link the given graph.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Pass * createHexagonLoopIdiomPass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Interval::pred_iterator pred_end(Interval *I)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
bool isModOrRefSet(const ModRefInfo MRI)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
void replace(Container &Cont, typename Container::iterator ContIt, typename Container::iterator ContEnd, RandomAccessIterator ValIt, RandomAccessIterator ValEnd)
Given a sequence container Cont, replace the range [ContIt, ContEnd) with the range [ValIt,...
auto predecessors(const MachineBasicBlock *BB)
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...