57#define DEBUG_TYPE "lower-matrix-intrinsics"
59STATISTIC(FlattenedMatrices,
"Number of matrix flattenings");
60STATISTIC(ReshapedMatrices,
"Number of matrix reshapes");
65 cl::desc(
"Enable/disable fusing matrix instructions."));
70 "Tile size for matrix instruction fusion using square-shaped tiles."));
73 cl::desc(
"Generate loop nest for tiling."));
76 cl::desc(
"Force matrix instruction fusion even if not profitable."));
79 cl::desc(
"Allow the use of FMAs if available and profitable. This may "
80 "result in different results, due to less rounding error."));
84 cl::desc(
"Enable/disable matrix shape verification."),
91 cl::desc(
"Sets the default matrix layout"),
93 "Use column-major layout"),
95 "Use row-major layout")));
112 return SV->isZeroEltSplat();
117template <
typename LTy,
typename RTy>
123template <
typename LTy,
typename RTy>
171 unsigned NumElements,
Type *EltType,
176 "Stride must be >= the number of elements in the result vector.");
179 Value *VecStart = Builder.CreateMul(VecIdx, Stride,
"vec.start");
186 VecStart = Builder.CreateGEP(EltType, BasePtr, VecStart,
"vec.gep");
198 ShapeInfo(
unsigned NumRows = 0,
unsigned NumColumns = 0)
199 : NumRows(NumRows), NumColumns(NumColumns),
203 : ShapeInfo(
cast<ConstantInt>(NumRows)->getZExtValue(),
204 cast<ConstantInt>(NumColumns)->getZExtValue()) {}
207 return NumRows == other.NumRows && NumColumns == other.NumColumns;
209 bool operator!=(
const ShapeInfo &other) {
return !(*
this == other); }
213 operator bool()
const {
214 assert(NumRows == 0 || NumColumns != 0);
218 unsigned getStride()
const {
224 unsigned getNumVectors()
const {
231 ShapeInfo t()
const {
return ShapeInfo(NumColumns, NumRows); }
233 friend raw_ostream &
operator<<(raw_ostream &OS, ShapeInfo SI);
239 return OS <<
SI.NumRows <<
'x' <<
SI.NumColumns;
244static bool isUniformShape(
Value *V) {
253 switch (Cast->getOpcode()) {
254 case llvm::Instruction::Trunc:
255 case llvm::Instruction::ZExt:
256 case llvm::Instruction::SExt:
257 case llvm::Instruction::FPToUI:
258 case llvm::Instruction::FPToSI:
259 case llvm::Instruction::UIToFP:
260 case llvm::Instruction::SIToFP:
261 case llvm::Instruction::FPTrunc:
262 case llvm::Instruction::FPExt:
264 case llvm::Instruction::AddrSpaceCast:
265 case CastInst::PtrToAddr:
266 case CastInst::PtrToInt:
267 case CastInst::IntToPtr:
269 case CastInst::BitCast: {
272 return SrcVTy->getNumElements() == DestVTy->getNumElements();
275 case llvm::Instruction::CastOpsEnd:
282 switch (
II->getIntrinsicID()) {
284 case Intrinsic::fabs:
290 switch (
I->getOpcode()) {
291 case Instruction::PHI:
292 case Instruction::FNeg:
300static std::optional<ShapeInfo>
308 return ShapeInfo(M, K);
312 return ShapeInfo(
N, M);
317 return ShapeInfo(
N, M);
320 return ShapeInfo(M,
N);
323 auto OpShape = ShapeMap.
find(MatrixA);
324 if (OpShape != ShapeMap.
end())
325 return OpShape->second;
329 auto Ops =
I->operands();
332 for (
auto &
Op : ShapedOps) {
333 auto OpShape = ShapeMap.
find(
Op.get());
334 if (OpShape != ShapeMap.
end())
335 return OpShape->second;
364class LowerMatrixIntrinsics {
366 const DataLayout &DL;
367 const TargetTransformInfo &TTI;
370 DominatorTree *DT =
nullptr;
371 LoopInfo *LI =
nullptr;
372 OptimizationRemarkEmitter *ORE =
nullptr;
377 unsigned NumStores = 0;
379 unsigned NumLoads = 0;
381 unsigned NumComputeOps = 0;
385 unsigned NumExposedTransposes = 0;
388 NumStores +=
RHS.NumStores;
389 NumLoads +=
RHS.NumLoads;
390 NumComputeOps +=
RHS.NumComputeOps;
391 NumExposedTransposes +=
RHS.NumExposedTransposes;
399 SmallVector<Value *, 16> Vectors;
403 bool IsColumnMajor =
true;
410 MatrixTy(
unsigned NumRows,
unsigned NumColumns,
Type *EltTy)
413 unsigned D = isColumnMajor() ? NumColumns : NumRows;
414 for (
unsigned J = 0; J <
D; ++J)
416 EltTy, isColumnMajor() ? NumRows : NumColumns)));
419 Value *getVector(
unsigned i)
const {
return Vectors[i]; }
420 Value *getColumn(
unsigned i)
const {
421 assert(isColumnMajor() &&
"only supported for column-major matrixes");
424 Value *getRow(
unsigned i)
const {
425 assert(!isColumnMajor() &&
"only supported for row-major matrixes");
429 void setVector(
unsigned i,
Value *V) { Vectors[i] =
V; }
431 Type *getElementType()
const {
return getVectorTy()->getElementType(); }
433 unsigned getNumVectors()
const {
435 return getNumColumns();
439 unsigned getNumColumns()
const {
441 return Vectors.size();
443 assert(Vectors.size() > 0 &&
"Cannot call getNumRows without columns");
444 return getVectorTy()->getNumElements();
447 unsigned getNumRows()
const {
448 if (isColumnMajor()) {
449 assert(Vectors.size() > 0 &&
"Cannot call getNumRows without columns");
450 return getVectorTy()->getNumElements();
452 return Vectors.size();
455 void addVector(
Value *V) { Vectors.push_back(V); }
456 FixedVectorType *getColumnTy() {
457 assert(isColumnMajor() &&
"only supported for column-major matrixes");
458 return getVectorTy();
461 FixedVectorType *getVectorTy()
const {
465 iterator_range<SmallVector<Value *, 8>::iterator> columns() {
467 "columns() only supported for column-major matrixes");
468 return make_range(Vectors.begin(), Vectors.end());
471 iterator_range<SmallVector<Value *, 8>::iterator>
vectors() {
472 return make_range(Vectors.begin(), Vectors.end());
478 return Vectors.size() == 1 ? Vectors[0]
482 MatrixTy &addNumLoads(
unsigned N) {
483 OpInfo.NumLoads +=
N;
487 void setNumLoads(
unsigned N) { OpInfo.NumLoads =
N; }
489 MatrixTy &addNumStores(
unsigned N) {
490 OpInfo.NumStores +=
N;
494 MatrixTy &addNumExposedTransposes(
unsigned N) {
495 OpInfo.NumExposedTransposes +=
N;
499 MatrixTy &addNumComputeOps(
unsigned N) {
500 OpInfo.NumComputeOps +=
N;
504 unsigned getNumStores()
const {
return OpInfo.NumStores; }
505 unsigned getNumLoads()
const {
return OpInfo.NumLoads; }
506 unsigned getNumComputeOps()
const {
return OpInfo.NumComputeOps; }
508 const OpInfoTy &getOpInfo()
const {
return OpInfo; }
510 bool isColumnMajor()
const {
return IsColumnMajor; }
512 unsigned getStride()
const {
515 return getNumColumns();
518 ShapeInfo shape()
const {
return {getNumRows(), getNumColumns()}; }
525 Value *Vec = isColumnMajor() ? getColumn(J) : getRow(
I);
528 "Extracted vector will contain poison values");
549 DenseMap<Value *, ShapeInfo> ShapeMap;
554 SmallVector<Instruction *, 16> ToRemove;
557 MapVector<Value *, MatrixTy> Inst2ColumnMatrix;
560 static FastMathFlags getFastMathFlags(Instruction *Inst) {
572 LowerMatrixIntrinsics(Function &
F, TargetTransformInfo &TTI,
574 : Func(
F), DL(
F.getDataLayout()), TTI(TTI), AM(AM) {}
576 unsigned getNumOps(
Type *VT) {
583 bool isMinimal()
const {
589 unsigned getNumOps(
Type *ST,
unsigned N) {
590 return std::ceil((
ST->getPrimitiveSizeInBits() *
N).getFixedValue() /
591 double(TTI.getRegisterBitWidth(
601 MatrixTy getMatrix(
Value *MatrixVal,
const ShapeInfo &SI,
605 "The vector size must match the number of matrix elements");
611 auto Found = Inst2ColumnMatrix.find(MatrixVal);
612 if (Found != Inst2ColumnMatrix.end()) {
613 MatrixTy &
M = Found->second;
616 if (
SI.NumRows ==
M.getNumRows() &&
SI.NumColumns ==
M.getNumColumns())
619 MatrixVal =
M.embedInVector(Builder);
623 SmallVector<Value *, 16> SplitVecs;
625 MaskStart +=
SI.getStride()) {
633 if (Found != Inst2ColumnMatrix.end()) {
636 LLVM_DEBUG(
dbgs() <<
"matrix reshape from " << Found->second.shape()
637 <<
" to " << SI <<
" using at least "
638 << SplitVecs.
size() <<
" shuffles on behalf of:\n"
641 }
else if (!ShapeMap.contains(MatrixVal)) {
644 <<
"splitting a " << SI <<
" matrix with " << SplitVecs.
size()
645 <<
" shuffles beacuse we do not have a shape-aware lowering for "
662 bool setShapeInfo(
Value *V, ShapeInfo Shape) {
663 assert(Shape &&
"Shape not set");
667 auto SIter = ShapeMap.find(V);
668 if (SIter != ShapeMap.end()) {
670 SIter->second.NumColumns != Shape.NumColumns)) {
671 errs() <<
"Conflicting shapes (" << SIter->second.NumRows <<
"x"
672 << SIter->second.NumColumns <<
" vs " << Shape.NumRows <<
"x"
673 << Shape.NumColumns <<
") for " << *
V <<
"\n";
675 "Matrix shape verification failed, compilation aborted!");
679 << SIter->second.NumRows <<
" "
680 << SIter->second.NumColumns <<
" for " << *V <<
"\n");
684 ShapeMap.insert({
V, Shape});
685 LLVM_DEBUG(
dbgs() <<
" " << Shape.NumRows <<
" x " << Shape.NumColumns
686 <<
" for " << *V <<
"\n");
692 bool supportsShapeInfo(
Value *V) {
699 switch (
II->getIntrinsicID()) {
700 case Intrinsic::matrix_multiply:
701 case Intrinsic::matrix_transpose:
702 case Intrinsic::matrix_column_major_load:
703 case Intrinsic::matrix_column_major_store:
706 return isUniformShape(
II);
717 propagateShapeForward(SmallVectorImpl<Instruction *> &WorkList) {
723 while (!WorkList.
empty()) {
727 bool Propagate =
false;
728 if (
auto SI = computeShapeInfoForInst(Inst, ShapeMap))
729 Propagate = setShapeInfo(Inst, *SI);
733 for (
auto *User : Inst->
users())
734 if (ShapeMap.count(User) == 0)
745 propagateShapeBackward(SmallVectorImpl<Instruction *> &WorkList) {
748 auto pushInstruction = [](
Value *
V,
749 SmallVectorImpl<Instruction *> &WorkList) {
758 while (!WorkList.
empty()) {
761 size_t BeforeProcessingV = WorkList.
size();
773 if (setShapeInfo(MatrixA, {
M,
N}))
774 pushInstruction(MatrixA, WorkList);
776 if (setShapeInfo(MatrixB, {
N,
K}))
777 pushInstruction(MatrixB, WorkList);
782 if (setShapeInfo(MatrixA, {
M,
N}))
783 pushInstruction(MatrixA, WorkList);
787 if (setShapeInfo(MatrixA, {
M,
N})) {
788 pushInstruction(MatrixA, WorkList);
800 ShapeInfo Shape = ShapeMap[
V];
801 for (Use &U : ShapedOps) {
802 if (setShapeInfo(
U.get(), Shape))
803 pushInstruction(
U.get(), WorkList);
809 for (
size_t I = BeforeProcessingV;
I != WorkList.
size();
I++)
810 for (User *U : WorkList[
I]->
users())
821 Value *Op0, ShapeInfo Shape0,
Value *Op1, ShapeInfo Shape1,
822 MatrixBuilder &Builder,
823 function_ref<Instruction *(
Value *, ShapeInfo,
Value *, ShapeInfo)>
826 Op0, Shape0.NumRows, Shape0.NumColumns, Op0->
getName() +
"_t");
829 setShapeInfo(T0, Shape0.t());
831 Op1, Shape1.NumRows, Shape1.NumColumns, Op1->
getName() +
"_t");
832 setShapeInfo(
T1, Shape1.t());
838 void eraseFromParentAndRemoveFromShapeMap(Instruction *Inst) {
839 ShapeMap.erase(Inst);
851 if (
II != BB.
rend() && Inst == &*
II)
853 eraseFromParentAndRemoveFromShapeMap(Inst);
858 void updateShapeAndReplaceAllUsesWith(Instruction &Old,
Value *New) {
862 auto S = ShapeMap.find(&Old);
863 if (S != ShapeMap.end()) {
865 if (supportsShapeInfo(New))
866 ShapeMap.insert({
New, S->second});
879 MatrixBuilder Builder(IB);
882 ConstantInt *
R, *
K, *
C;
891 updateShapeAndReplaceAllUsesWith(
I, TATA);
892 eraseFromParentAndMove(&
I,
II, BB);
893 eraseFromParentAndMove(TA,
II, BB);
900 updateShapeAndReplaceAllUsesWith(
I, TA);
901 eraseFromParentAndMove(&
I,
II, BB);
911 auto NewInst = distributeTransposes(
912 TAMB, {
K,
C}, TAMA, {
R,
K}, Builder,
913 [&](
Value *T0, ShapeInfo Shape0,
Value *
T1, ShapeInfo Shape1) {
916 Shape1.NumColumns,
"mmul");
918 updateShapeAndReplaceAllUsesWith(
I, NewInst);
919 eraseFromParentAndMove(&
I,
II, BB);
920 eraseFromParentAndMove(TA,
II, BB);
934 auto NewInst = distributeTransposes(
935 TAMA, {
R,
C}, TAMB, {
R,
C}, Builder,
936 [&](
Value *T0, ShapeInfo Shape0,
Value *
T1, ShapeInfo Shape1) {
937 bool IsFP =
I.getType()->isFPOrFPVectorTy();
938 auto *
Mul = IsFP ? LocalBuilder.CreateFMul(T0,
T1,
"mmul")
939 : LocalBuilder.CreateMul(T0,
T1,
"mmul");
941 setShapeInfo(Result, Shape0);
944 updateShapeAndReplaceAllUsesWith(
I, NewInst);
945 eraseFromParentAndMove(&
I,
II, BB);
946 eraseFromParentAndMove(TA,
II, BB);
955 auto NewInst = distributeTransposes(
956 TAMA, {
R,
C}, TAMB, {
R,
C}, Builder,
957 [&](
Value *T0, ShapeInfo Shape0,
Value *
T1, ShapeInfo Shape1) {
958 bool IsFP =
I.getType()->isFPOrFPVectorTy();
959 auto *
Add = IsFP ? LocalBuilder.CreateFAdd(T0,
T1,
"madd")
960 : LocalBuilder.CreateAdd(T0,
T1,
"madd");
963 setShapeInfo(Result, Shape0);
966 updateShapeAndReplaceAllUsesWith(
I, NewInst);
967 eraseFromParentAndMove(&
I,
II, BB);
968 eraseFromParentAndMove(TA,
II, BB);
976 bool liftTranspose(Instruction &
I) {
980 eraseFromParentAndRemoveFromShapeMap(&
T);
983 if (
A !=
B &&
B->use_empty())
988 ConstantInt *
R, *
K, *
C;
996 MatrixBuilder Builder(IB);
998 BT, AT,
C->getZExtValue(),
K->getZExtValue(),
R->getZExtValue());
999 setShapeInfo(M, {
C,
R});
1002 updateShapeAndReplaceAllUsesWith(
I, NewInst);
1003 CleanupBinOp(
I,
A,
B);
1015 auto *
Add = Builder.CreateFAdd(AT,
BT,
"mfadd");
1016 MatrixBuilder MBuilder(Builder);
1017 Instruction *NewInst = MBuilder.CreateMatrixTranspose(
1018 Add,
R->getZExtValue(),
C->getZExtValue(),
"mfadd_t");
1019 updateShapeAndReplaceAllUsesWith(
I, NewInst);
1020 assert(computeShapeInfoForInst(NewInst, ShapeMap) ==
1021 computeShapeInfoForInst(&
I, ShapeMap) &&
1022 "Shape of new instruction doesn't match original shape.");
1023 CleanupBinOp(
I,
A,
B);
1025 setShapeInfo(AddI, {
R,
C});
1027 computeShapeInfoForInst(AddI, ShapeMap).value_or(ShapeMap[AddI]) ==
1029 "Shape of updated addition doesn't match cached shape.");
1037 bool optimizeTransposes() {
1041 for (BasicBlock &BB :
reverse(Func)) {
1046 if (Instruction *NewInst = sinkTranspose(
I,
II,
Changed))
1053 for (BasicBlock &BB : Func) {
1066 for (BasicBlock &BB : Func)
1067 for (Instruction &Inst : BB) {
1072 switch (
II->getIntrinsicID()) {
1073 case Intrinsic::matrix_multiply:
1074 case Intrinsic::matrix_transpose:
1075 case Intrinsic::matrix_column_major_load:
1076 case Intrinsic::matrix_column_major_store:
1085 if (WorkList.
empty())
1089 ORE = &AM->getResult<OptimizationRemarkEmitterAnalysis>(Func);
1090 AA = &AM->getResult<AAManager>(Func);
1091 DT = &AM->getResult<DominatorTreeAnalysis>(Func);
1092 LI = &AM->getResult<LoopAnalysis>(Func);
1096 while (!WorkList.
empty()) {
1097 WorkList = propagateShapeForward(WorkList);
1098 WorkList = propagateShapeBackward(WorkList);
1103 Changed |= optimizeTransposes();
1105 dbgs() <<
"Dump after matrix transpose optimization:\n";
1111 SmallVector<Instruction *, 16> MatrixInsts;
1116 ReversePostOrderTraversal<Function *> RPOT(&Func);
1117 for (
auto *BB : RPOT)
1118 for (Instruction &
I : *BB) {
1121 if (!ShapeMap.contains(&
I))
1129 SmallPtrSet<Instruction *, 16> FusedInsts;
1130 for (CallInst *CI : MaybeFusableInsts)
1131 lowerDotProduct(CI, FusedInsts, getFastMathFlags(CI));
1134 for (CallInst *CI : MaybeFusableInsts)
1136 LowerMatrixMultiplyFused(CI, FusedInsts, LifetimeEnds);
1142 for (Instruction *Inst : MatrixInsts) {
1143 if (FusedInsts.
count(Inst))
1150 const ShapeInfo &
SI = ShapeMap.at(Inst);
1152 MatrixTy PhiM(
SI.NumRows,
SI.NumColumns, EltTy);
1155 for (
unsigned VI = 0, VE = PhiM.getNumVectors(); VI != VE; ++VI)
1156 PhiM.setVector(VI, Builder.CreatePHI(PhiM.getVectorTy(),
1157 PHI->getNumIncomingValues(),
1159 assert(!Inst2ColumnMatrix.contains(
PHI) &&
"map already contains phi?");
1160 Inst2ColumnMatrix[
PHI] = PhiM;
1164 for (Instruction *Inst : MatrixInsts) {
1165 if (FusedInsts.
count(Inst))
1168 const ShapeInfo &
SI = ShapeMap.at(Inst);
1175 Result = VisitBinaryOperator(BinOp, SI, Builder);
1177 Result = VisitCastInstruction(Cast, SI, Builder);
1179 Result = VisitUnaryOperator(UnOp, SI, Builder);
1181 Result = VisitIntrinsicInst(Intr, SI, Builder);
1193 finalizeLowering(Inst, Result, Builder);
1198 RemarkGenerator RemarkGen(Inst2ColumnMatrix, *ORE, Func);
1199 RemarkGen.emitRemarks();
1211 SmallPtrSet<Instruction *, 16> PoisonedInsts;
1212 for (
auto *Inst :
reverse(ToRemove)) {
1215 PoisonedInsts.
insert(Poisoned);
1219 PoisonedInsts.
erase(Inst);
1221 if (!PoisonedInsts.
empty()) {
1223 dbgs() <<
"Poisoned but present instructions:\n";
1224 for (
auto *
I : PoisonedInsts)
1225 dbgs() << *
I <<
"\n";
1233 MatrixTy VisitIntrinsicInst(IntrinsicInst *Inst,
const ShapeInfo &SI,
1239 case Intrinsic::matrix_multiply:
1240 return LowerMultiply(Inst, Builder);
1241 case Intrinsic::matrix_transpose:
1242 return LowerTranspose(Inst, Builder);
1243 case Intrinsic::matrix_column_major_load:
1244 return LowerColumnMajorLoad(Inst, Builder);
1245 case Intrinsic::matrix_column_major_store:
1246 return LowerColumnMajorStore(Inst, Builder);
1247 case Intrinsic::abs:
1248 case Intrinsic::fabs: {
1250 MatrixTy
M = getMatrix(Inst->
getOperand(0), SI, Builder);
1253 for (
auto *
Vector :
M.vectors()) {
1255 case Intrinsic::abs:
1259 case Intrinsic::fabs:
1268 return Result.addNumComputeOps(getNumOps(
Result.getVectorTy()) *
1275 "only intrinsics supporting shape info should be seen here");
1283 Align getAlignForIndex(
unsigned Idx,
Value *Stride,
Type *ElementTy,
1284 MaybeAlign
A)
const {
1285 Align InitialAlign = DL.getValueOrABITypeAlignment(
A, ElementTy);
1287 return InitialAlign;
1289 TypeSize ElementSizeInBits = DL.getTypeSizeInBits(ElementTy);
1291 uint64_t StrideInBytes =
1292 ConstStride->getZExtValue() * ElementSizeInBits / 8;
1301 bool IsVolatile, ShapeInfo Shape,
IRBuilder<> &Builder) {
1307 for (
unsigned I = 0,
E = Shape.getNumVectors();
I <
E; ++
I) {
1310 Stride, Shape.getStride(), EltTy, Builder);
1312 VecTy,
GEP, getAlignForIndex(
I, Stride, EltTy, MAlign),
1313 IsVolatile,
"col.load");
1317 return Result.addNumLoads(getNumOps(
Result.getVectorTy()) *
1323 MatrixTy loadMatrix(
Value *MatrixPtr, MaybeAlign Align,
bool IsVolatile,
1325 ShapeInfo ResultShape,
Type *EltTy,
1332 ResultShape.NumColumns);
1334 return loadMatrix(TileTy, TileStart, Align,
1335 Builder.
getInt64(MatrixShape.getStride()), IsVolatile,
1336 ResultShape, Builder);
1341 Value *Stride,
bool IsVolatile, ShapeInfo Shape,
1343 return loadMatrix(Inst->
getType(),
Ptr, Align, Stride, IsVolatile, Shape,
1350 MatrixTy LowerColumnMajorLoad(CallInst *Inst,
IRBuilder<> &Builder) {
1352 "Intrinsic only supports column-major layout!");
1357 {Inst->getArgOperand(3), Inst->getArgOperand(4)}, Builder);
1362 void storeMatrix(
const MatrixTy &StoreVal,
Value *MatrixPtr,
1363 MaybeAlign MAlign,
bool IsVolatile, ShapeInfo MatrixShape,
1370 StoreVal.getNumColumns());
1372 storeMatrix(TileTy, StoreVal, TileStart, MAlign,
1373 Builder.
getInt64(MatrixShape.getStride()), IsVolatile, Builder);
1378 MatrixTy storeMatrix(
Type *Ty, MatrixTy StoreVal,
Value *
Ptr,
1379 MaybeAlign MAlign,
Value *Stride,
bool IsVolatile,
1383 for (
auto Vec :
enumerate(StoreVal.vectors())) {
1390 getAlignForIndex(Vec.index(), Stride,
1395 return MatrixTy().addNumStores(getNumOps(StoreVal.getVectorTy()) *
1396 StoreVal.getNumVectors());
1401 MaybeAlign
A,
Value *Stride,
bool IsVolatile,
1403 auto StoreVal = getMatrix(
Matrix, Shape, Builder);
1404 return storeMatrix(
Matrix->getType(), StoreVal,
Ptr,
A, Stride, IsVolatile,
1411 MatrixTy LowerColumnMajorStore(CallInst *Inst,
IRBuilder<> &Builder) {
1413 "Intrinsic only supports column-major layout!");
1419 {Inst->getArgOperand(4), Inst->getArgOperand(5)},
1428 unsigned BlockNumElts =
1431 assert(NumElts >= BlockNumElts &&
"Too few elements for current block");
1438 SmallVector<int, 16>
Mask;
1440 for (i = 0; i <
I; i++)
1443 unsigned VecNumElts =
1445 for (; i <
I + BlockNumElts; i++)
1446 Mask.push_back(i -
I + VecNumElts);
1448 for (; i < VecNumElts; i++)
1456 unsigned &NumComputeOps) {
1457 NumComputeOps += getNumOps(
A->getType());
1462 if (AllowContraction) {
1468 NumComputeOps += getNumOps(
A->getType());
1473 NumComputeOps += getNumOps(
A->getType());
1483 void finalizeLowering(Instruction *Inst, MatrixTy
Matrix,
1485 auto inserted = Inst2ColumnMatrix.insert(std::make_pair(Inst,
Matrix));
1488 "multiple matrix lowering mapping");
1490 ToRemove.push_back(Inst);
1491 Value *Flattened =
nullptr;
1493 if (ShapeMap.contains(
U.getUser()))
1497 Flattened =
Matrix.embedInVector(Builder);
1500 <<
"flattening a " <<
Matrix.shape() <<
" matrix:\n"
1502 <<
"\nbecause we do not have a shape-aware lowering for its "
1505 FlattenedMatrices++;
1514 void lowerDotProduct(CallInst *MatMul,
1515 SmallPtrSet<Instruction *, 16> &FusedInsts,
1516 FastMathFlags FMF) {
1523 if (LShape.NumRows != 1 || RShape.NumColumns != 1)
1536 auto CanBeFlattened = [](
Value *
Op) {
1549 auto GetCostForArg = [
this, &CanBeFlattened](
Value *
Op,
unsigned N) {
1550 if (!ShapeMap.contains(
Op))
1551 return InstructionCost::getInvalid();
1559 if (!CanBeFlattened(
Op)) {
1562 for (
unsigned I = 1;
I <
N; ++
I)
1563 EmbedCost += TTI.getShuffleCost(
1576 return NewCost - OriginalCost;
1584 for (
unsigned I = 1;
I <
N; ++
I)
1585 EmbedCost -= TTI.getShuffleCost(
1595 return TTI.getMemoryOpCost(Instruction::Load, VecTy,
Align(1), 0) -
1596 N * TTI.getMemoryOpCost(Instruction::Load, EltTy,
Align(1), 0);
1602 SmallPtrSet<Value *, 4> Seen;
1607 while (!WorkList.
empty()) {
1613 if (OpCost + LHSCost >= LHSCost)
1619 WorkList.
append(
I->op_begin(),
I->op_end());
1623 int AddOpCode = IsIntVec ? Instruction::Add : Instruction::FAdd;
1624 int MulOpCode = IsIntVec ? Instruction::Mul : Instruction::FMul;
1626 TTI.getArithmeticReductionCost(
1628 IsIntVec ? std::nullopt : std::optional(FMF)) +
1629 TTI.getArithmeticInstrCost(MulOpCode,
LHS->
getType());
1631 TTI.getArithmeticInstrCost(AddOpCode, ElementType) *
1632 (LShape.NumColumns - 1) +
1633 TTI.getArithmeticInstrCost(MulOpCode, ElementType) *
1634 (LShape.NumColumns);
1635 if ((LHSCost + ReductionCost - SequentialAddCost) >
InstructionCost(0))
1638 FusedInsts.
insert(MatMul);
1640 auto FlattenArg = [&Builder, &FusedInsts, &CanBeFlattened,
1645 if (!CanBeFlattened(
Op))
1649 auto It = ShapeMap.find(
Op);
1650 if (It != ShapeMap.end()) {
1651 It->second = It->second.t();
1661 auto *NewLoad = Builder.
CreateLoad(
Op->getType(), Arg);
1662 Op->replaceAllUsesWith(NewLoad);
1668 Op->replaceAllUsesWith(Arg);
1673 for (
auto *V : ToFlatten)
1695 Result, uint64_t(0));
1697 FusedInsts.insert(MatMul);
1698 ToRemove.push_back(MatMul);
1708 void emitMatrixMultiply(MatrixTy &Result,
const MatrixTy &
A,
1710 bool IsScalarMatrixTransposed, FastMathFlags FMF) {
1711 const unsigned VF = std::max<unsigned>(
1714 Result.getElementType()->getPrimitiveSizeInBits().getFixedValue(),
1716 unsigned R =
Result.getNumRows();
1717 unsigned C =
Result.getNumColumns();
1718 unsigned M =
A.getNumColumns();
1720 bool IsFP =
Result.getElementType()->isFloatingPointTy();
1721 assert(
A.isColumnMajor() ==
B.isColumnMajor() &&
1722 Result.isColumnMajor() ==
A.isColumnMajor() &&
1723 "operands must agree on matrix layout");
1724 unsigned NumComputeOps = 0;
1728 if (
A.isColumnMajor()) {
1732 for (
unsigned J = 0; J <
C; ++J) {
1744 for (
unsigned K = 0;
K <
M; ++
K) {
1747 B.getColumn(IsScalarMatrixTransposed ? K : J),
1748 IsScalarMatrixTransposed ? J : K);
1751 createMulAdd(isSumZero && K == 0 ?
nullptr : Sum, L,
Splat,
1762 for (
unsigned I = 0;
I <
R; ++
I) {
1765 for (
unsigned J = 0; J <
C; J +=
BlockSize) {
1770 Value *Sum =
nullptr;
1771 for (
unsigned K = 0;
K <
M; ++
K) {
1774 A.getVector(IsScalarMatrixTransposed ? K :
I),
1775 IsScalarMatrixTransposed ?
I : K);
1778 createMulAdd(isSumZero && K == 0 ?
nullptr : Sum,
Splat, R,
1786 Result.addNumComputeOps(NumComputeOps);
1792 Value *getNonAliasingPointer(LoadInst *Load, StoreInst *Store,
1798 if (AA->isNoAlias(LoadLoc, StoreLoc))
1799 return Load->getPointerOperand();
1811 DTUpdates.
push_back({DT->Delete, Check0, Succ});
1815 nullptr,
"alias_cont");
1821 nullptr,
"no_alias");
1831 const_cast<Value *
>(StoreLoc.
Ptr), IntPtrTy,
"store.begin");
1833 StoreBegin, ConstantInt::get(IntPtrTy, StoreLoc.
Size.
getValue()),
1834 "store.end",
true,
true);
1836 IntPtrTy,
"load.begin");
1846 LoadBegin, ConstantInt::get(IntPtrTy, LoadLoc.
Size.
getValue()),
1847 "load.end",
true,
true);
1856 auto *ArrayTy = ArrayType::get(VT->getElementType(), VT->getNumElements());
1857 AllocaInst *Alloca =
1865 PHI->addIncoming(
Load->getPointerOperand(), Check1);
1866 PHI->addIncoming(Alloca, Copy);
1869 DTUpdates.
push_back({DT->Insert, Check0, Check1});
1870 DTUpdates.
push_back({DT->Insert, Check0, Fusion});
1872 DTUpdates.
push_back({DT->Insert, Check1, Fusion});
1873 DT->applyUpdates(DTUpdates);
1877 bool isFusionProfitable(CallInst *MatMul) {
1884 const unsigned R = LShape.NumRows;
1885 const unsigned C = RShape.NumColumns;
1886 const unsigned M = LShape.NumColumns;
1889 const unsigned VF = std::max<unsigned>(
1901 if (R <= VF &&
C == 1)
1907 unsigned Op0Regs = (
R + VF - 1) / VF * M;
1908 unsigned Op1Regs = (
M + VF - 1) / VF *
C;
1909 return Op0Regs + Op1Regs >
1910 TTI.getNumberOfRegisters(TTI.getRegisterClassForType(
true));
1913 MatrixTy getZeroMatrix(
Type *EltType,
unsigned R,
unsigned C) {
1916 for (
unsigned I = 0;
I <
C; ++
I)
1921 void createTiledLoops(CallInst *MatMul,
Value *LPtr, ShapeInfo LShape,
1922 Value *RPtr, ShapeInfo RShape, StoreInst *Store) {
1926 TileInfo TI(LShape.NumRows, RShape.NumColumns, LShape.NumColumns,
TileSize);
1927 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
1933 BasicBlock *InnerBody = TI.CreateTiledLoops(Start, End, Builder, DTU, *LI);
1937 MatrixTy TileResult;
1943 auto *
Phi = Builder.
CreatePHI(TileVecTy, 2,
"result.vec." + Twine(
I));
1945 TI.RowLoop.Header->getSingleSuccessor());
1946 TileResult.addVector(Phi);
1955 loadMatrix(LPtr, {},
false, LShape, TI.RowLoop.Index, TI.KLoop.Index,
1958 loadMatrix(RPtr, {},
false, RShape, TI.KLoop.Index, TI.ColumnLoop.Index,
1960 emitMatrixMultiply(TileResult,
A,
B, Builder,
true,
false,
1961 getFastMathFlags(MatMul));
1964 storeMatrix(TileResult,
Store->getPointerOperand(),
Store->getAlign(),
1965 Store->isVolatile(), {LShape.NumRows, RShape.NumColumns},
1966 TI.RowLoop.Index, TI.ColumnLoop.Index, EltType, Builder);
1968 for (
unsigned I = 0;
I < TileResult.getNumVectors();
I++)
1969 ColumnPhis[
I]->addIncoming(TileResult.getVector(
I), TI.KLoop.Latch);
1975 unsigned InnerLoopUnrollCount = std::min(10u, LShape.NumColumns /
TileSize);
1977 "llvm.loop.unroll.count", InnerLoopUnrollCount);
1980 void emitSIMDTiling(CallInst *MatMul, LoadInst *LoadOp0, LoadInst *LoadOp1,
1982 SmallPtrSetImpl<Instruction *> &FusedInsts) {
1984 "Tiling only supported for column-major matrixes at the moment!");
1985 if (!isFusionProfitable(MatMul))
1991 const unsigned R = LShape.NumRows;
1992 const unsigned C = RShape.NumColumns;
1993 const unsigned M = LShape.NumColumns;
1996 Value *APtr = getNonAliasingPointer(LoadOp0, Store, MatMul);
1997 Value *BPtr = getNonAliasingPointer(LoadOp1, Store, MatMul);
2001 createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store);
2004 for (
unsigned J = 0; J <
C; J +=
TileSize)
2006 const unsigned TileR = std::min(R -
I,
unsigned(
TileSize));
2007 const unsigned TileC = std::min(
C - J,
unsigned(
TileSize));
2008 MatrixTy Res = getZeroMatrix(EltType, TileR, TileC);
2011 const unsigned TileM = std::min(M - K,
unsigned(
TileSize));
2015 {TileR, TileM}, EltType, Builder);
2019 {TileM, TileC}, EltType, Builder);
2020 emitMatrixMultiply(Res,
A,
B, Builder,
true,
false,
2021 getFastMathFlags(MatMul));
2023 storeMatrix(Res, CPtr,
Store->getAlign(),
Store->isVolatile(), {R, M},
2030 FusedInsts.
insert(Store);
2031 FusedInsts.
insert(MatMul);
2032 eraseFromParentAndRemoveFromShapeMap(Store);
2033 eraseFromParentAndRemoveFromShapeMap(MatMul);
2035 FusedInsts.
insert(LoadOp0);
2036 eraseFromParentAndRemoveFromShapeMap(LoadOp0);
2038 if (LoadOp1 != LoadOp0 && LoadOp1->
use_empty()) {
2039 FusedInsts.
insert(LoadOp1);
2040 eraseFromParentAndRemoveFromShapeMap(LoadOp1);
2049 LowerMatrixMultiplyFused(CallInst *MatMul,
2050 SmallPtrSetImpl<Instruction *> &FusedInsts,
2055 assert(AA && LI &&
"Analyses should be available");
2070 const unsigned R = LShape.NumRows;
2071 const unsigned M = LShape.NumColumns;
2072 const unsigned C = RShape.NumColumns;
2079 MA = getMatrix(
A, ShapeInfo(R, M), Builder);
2080 MB = getMatrix(
T, ShapeInfo(
C, M), Builder);
2083 MA = getMatrix(
T, ShapeInfo(R, M), Builder);
2084 MB = getMatrix(
B, ShapeInfo(
C, M), Builder);
2089 MatrixTy
Result(R,
C, EltType);
2091 emitMatrixMultiply(Result, MA, MB, Builder,
false,
true,
2092 getFastMathFlags(MatMul));
2094 FusedInsts.
insert(MatMul);
2100 Inst2ColumnMatrix[Transpose] = MatrixTy(M,
C, EltType);
2102 finalizeLowering(MatMul, Result, Builder);
2114 if (LoadOp0 && LoadOp1 && Store) {
2117 SetVector<Value *> WorkList;
2120 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2121 Value *Current = WorkList[
I];
2127 if (DT->dominates(CurrI, MatMul))
2129 if (CurrI->mayHaveSideEffects() || CurrI->mayReadFromMemory())
2135 sort(ToHoist, [
this](Instruction *
A, Instruction *
B) {
2136 return DT->dominates(
A,
B);
2138 for (Instruction *
I : ToHoist)
2151 bool FusableOpsInSameBlock = LoadOp0->
getParent() == StoreParent &&
2153 for (
unsigned Idx = 0; Idx != LifetimeEnds.
size();) {
2154 IntrinsicInst *End = LifetimeEnds[Idx];
2158 if (DT->dominates(End, LoadOp0) && DT->dominates(End, LoadOp1))
2160 if (DT->dominates(Store, End))
2164 if (FusableOpsInSameBlock && End->
getParent() != StoreParent)
2172 if (AA->isNoAlias(Load0Loc, EndLoc) && AA->isNoAlias(Load1Loc, EndLoc))
2184 ToRemove.push_back(End);
2190 emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts);
2196 MatrixTy LowerMultiply(CallInst *MatMul,
IRBuilder<> &Builder) {
2201 const MatrixTy &Lhs = getMatrix(MatMul->
getArgOperand(0), LShape, Builder);
2202 const MatrixTy &Rhs = getMatrix(MatMul->
getArgOperand(1), RShape, Builder);
2203 assert(Lhs.getElementType() == Rhs.getElementType() &&
2204 "Matrix multiply argument element types do not match.");
2206 const unsigned R = LShape.NumRows;
2207 const unsigned C = RShape.NumColumns;
2208 assert(LShape.NumColumns == RShape.NumRows);
2211 MatrixTy
Result(R,
C, EltType);
2212 assert(Lhs.getElementType() ==
Result.getElementType() &&
2213 "Matrix multiply result element type does not match arguments.");
2215 emitMatrixMultiply(Result, Lhs, Rhs, Builder,
false,
false,
2216 getFastMathFlags(MatMul));
2221 MatrixTy LowerTranspose(CallInst *Inst,
IRBuilder<> &Builder) {
2226 MatrixTy InputMatrix = getMatrix(InputVal, ArgShape, Builder);
2228 const unsigned NewNumVecs =
2229 InputMatrix.isColumnMajor() ? ArgShape.NumRows : ArgShape.NumColumns;
2230 const unsigned NewNumElts =
2231 InputMatrix.isColumnMajor() ? ArgShape.NumColumns : ArgShape.NumRows;
2233 for (
unsigned I = 0;
I < NewNumVecs; ++
I) {
2238 for (
auto J :
enumerate(InputMatrix.vectors())) {
2244 Result.addVector(ResultVector);
2250 return Result.addNumComputeOps(2 * ArgShape.NumRows * ArgShape.NumColumns)
2251 .addNumExposedTransposes(1);
2255 MatrixTy VisitLoad(LoadInst *Inst,
const ShapeInfo &SI,
Value *
Ptr,
2262 MatrixTy VisitStore(StoreInst *Inst,
const ShapeInfo &SI,
Value *StoredVal,
2269 MatrixTy VisitPHI(PHINode *Inst,
const ShapeInfo &SI,
IRBuilder<> &Builder) {
2270 auto BlockIP = Inst->
getParent()->getFirstInsertionPt();
2272 MatrixTy PhiM = getMatrix(Inst, SI, Builder);
2274 for (
auto [IncomingV, IncomingB] :
2281 if (
auto MaybeIP = IncomingInst->getInsertionPointAfterDef())
2284 MatrixTy OpM = getMatrix(IncomingV, SI, Builder);
2286 for (
unsigned VI = 0, VE = PhiM.getNumVectors(); VI != VE; ++VI) {
2288 NewPHI->
addIncoming(OpM.getVector(VI), IncomingB);
2299 MatrixTy VisitBinaryOperator(BinaryOperator *Inst,
const ShapeInfo &SI,
2305 MatrixTy
A = getMatrix(Lhs, SI, Builder);
2306 MatrixTy
B = getMatrix(Rhs, SI, Builder);
2307 assert(
A.isColumnMajor() ==
B.isColumnMajor() &&
2308 Result.isColumnMajor() ==
A.isColumnMajor() &&
2309 "operands must agree on matrix layout");
2316 return Result.addNumComputeOps(getNumOps(
Result.getVectorTy()) *
2321 MatrixTy VisitUnaryOperator(UnaryOperator *Inst,
const ShapeInfo &SI,
2326 MatrixTy
M = getMatrix(
Op, SI, Builder);
2331 auto BuildVectorOp = [&Builder, Inst](
Value *
Op) {
2333 case Instruction::FNeg:
2340 for (
auto *
Vector :
M.vectors())
2343 return Result.addNumComputeOps(getNumOps(
Result.getVectorTy()) *
2348 MatrixTy VisitCastInstruction(CastInst *Inst,
const ShapeInfo &Shape,
2353 MatrixTy
M = getMatrix(
Op, Shape, Builder);
2358 auto *NewVTy = VectorType::get(OrigVTy->getElementType(),
2361 for (
auto *
Vector :
M.vectors())
2364 return Result.addNumComputeOps(getNumOps(
Result.getVectorTy()) *
2369 MatrixTy VisitSelectInst(SelectInst *Inst,
const ShapeInfo &Shape,
2376 MatrixTy
A = getMatrix(OpA, Shape, Builder);
2377 MatrixTy
B = getMatrix(OpB, Shape, Builder);
2381 MatrixTy
C = getMatrix(
Cond, Shape, Builder);
2382 llvm::copy(
C.vectors(), std::back_inserter(CondV));
2384 CondV.
resize(
A.getNumVectors());
2391 return Result.addNumComputeOps(getNumOps(
Result.getVectorTy()) *
2398 struct ExprLinearizer {
2399 unsigned LengthToBreak = 100;
2401 raw_string_ostream Stream;
2402 unsigned LineLength = 0;
2403 const DataLayout &DL;
2407 const MapVector<Value *, MatrixTy> &Inst2Matrix;
2411 const DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared;
2414 const SmallSetVector<Value *, 32> &ExprsInSubprogram;
2421 SmallPtrSet<Value *, 8> ReusedExprs;
2423 ExprLinearizer(
const DataLayout &DL,
2424 const MapVector<Value *, MatrixTy> &Inst2Matrix,
2425 const DenseMap<
Value *, SmallPtrSet<Value *, 2>> &Shared,
2426 const SmallSetVector<Value *, 32> &ExprsInSubprogram,
2428 : Stream(Str), DL(DL), Inst2Matrix(Inst2Matrix), Shared(Shared),
2429 ExprsInSubprogram(ExprsInSubprogram), Leaf(Leaf) {}
2431 void indent(
unsigned N) {
2433 for (
unsigned i = 0; i <
N; i++)
2442 void maybeIndent(
unsigned Indent) {
2443 if (LineLength >= LengthToBreak)
2446 if (LineLength == 0)
2450 void write(StringRef S) {
2451 LineLength += S.
size();
2455 Value *getUnderlyingObjectThroughLoads(
Value *V) {
2457 return getUnderlyingObjectThroughLoads(
Ptr);
2458 else if (
V->getType()->isPointerTy())
2464 bool isMatrix(
Value *V)
const {
return ExprsInSubprogram.count(V); }
2468 void prettyPrintMatrixType(
Value *V, raw_string_ostream &SS) {
2469 auto M = Inst2Matrix.find(V);
2470 if (M == Inst2Matrix.end())
2473 SS <<
M->second.getNumRows();
2475 SS <<
M->second.getNumColumns();
2482 void writeFnName(CallInst *CI) {
2484 write(
"<no called fn>");
2487 if (!
Name.starts_with(
"llvm.matrix")) {
2496 raw_string_ostream
SS(Tmp);
2498 switch (
II->getIntrinsicID()) {
2499 case Intrinsic::matrix_multiply:
2500 prettyPrintMatrixType(
II->getOperand(0), SS);
2502 prettyPrintMatrixType(
II->getOperand(1), SS);
2503 SS <<
"." << *
II->getType()->getScalarType();
2505 case Intrinsic::matrix_transpose:
2506 prettyPrintMatrixType(
II->getOperand(0), SS);
2507 SS <<
"." << *
II->getType()->getScalarType();
2509 case Intrinsic::matrix_column_major_load:
2510 prettyPrintMatrixType(
II, SS);
2511 SS <<
"." << *
II->getType()->getScalarType();
2513 case Intrinsic::matrix_column_major_store:
2514 prettyPrintMatrixType(
II->getOperand(0), SS);
2515 SS <<
"." << *
II->getOperand(0)->getType()->getScalarType();
2524 unsigned getNumShapeArgs(CallInst *CI)
const {
2526 switch (
II->getIntrinsicID()) {
2527 case Intrinsic::matrix_multiply:
2529 case Intrinsic::matrix_transpose:
2531 case Intrinsic::matrix_column_major_load:
2532 case Intrinsic::matrix_column_major_store:
2545 V = getUnderlyingObjectThroughLoads(V);
2546 if (
V->getType()->isPointerTy()) {
2548 Stream <<
"stack addr";
2549 LineLength += StringRef(
"stack addr").size();
2552 LineLength += StringRef(
"addr").size();
2554 if (!
V->getName().empty()) {
2555 Stream <<
" %" <<
V->getName() <<
"";
2556 LineLength +=
V->getName().size() + 2;
2562 raw_string_ostream TmpStream(Tmp);
2565 TmpStream << CI->getValue();
2567 TmpStream <<
"constant";
2570 TmpStream <<
"matrix";
2572 TmpStream <<
"scalar";
2574 Tmp = std::string(StringRef(Tmp).trim());
2575 LineLength += Tmp.size();
2582 void linearizeExpr(
Value *Expr,
unsigned Indent,
bool ParentReused,
2583 bool ParentShared) {
2585 maybeIndent(Indent);
2586 SmallVector<Value *, 8>
Ops;
2589 bool ExprShared =
false;
2592 if (!ParentShared) {
2593 auto SI = Shared.find(Expr);
2594 assert(SI != Shared.end() &&
SI->second.count(Leaf));
2600 write(
"shared with remark at line " + std::to_string(DL.getLine()) +
2601 " column " + std::to_string(DL.getCol()) +
" (");
2603 ExprShared =
SI->second.size() > 1;
2606 bool Reused = !ReusedExprs.insert(Expr).second;
2607 if (Reused && !ParentReused)
2620 Ops.append(
I->value_op_begin(),
I->value_op_end());
2621 write(
I->getOpcodeName());
2626 unsigned NumOpsToBreak = 1;
2631 if (
Ops.size() > NumOpsToBreak)
2634 maybeIndent(Indent + 1);
2636 linearizeExpr(
Op, Indent + 1, Reused, ExprShared);
2639 if (
Op !=
Ops.back())
2646 const std::string &getResult() {
2664 struct RemarkGenerator {
2665 const MapVector<Value *, MatrixTy> &Inst2Matrix;
2666 OptimizationRemarkEmitter &ORE;
2668 const DataLayout &DL;
2670 RemarkGenerator(
const MapVector<Value *, MatrixTy> &Inst2Matrix,
2671 OptimizationRemarkEmitter &ORE, Function &Func)
2672 : Inst2Matrix(Inst2Matrix), ORE(ORE), Func(Func),
2673 DL(Func.getDataLayout()) {}
2679 getExpressionLeaves(
const SmallSetVector<Value *, 32> &ExprsInSubprogram) {
2681 for (
auto *Expr : ExprsInSubprogram)
2683 !
any_of(Expr->
users(), [&ExprsInSubprogram](User *U) {
2684 return ExprsInSubprogram.count(U);
2693 void collectSharedInfo(
Value *Leaf,
Value *V,
2694 const SmallSetVector<Value *, 32> &ExprsInSubprogram,
2695 DenseMap<
Value *, SmallPtrSet<Value *, 2>> &Shared) {
2697 if (!ExprsInSubprogram.
count(V))
2703 collectSharedInfo(Leaf,
Op, ExprsInSubprogram, Shared);
2709 std::pair<OpInfoTy, OpInfoTy>
2710 sumOpInfos(
Value *Root, SmallPtrSetImpl<Value *> &ReusedExprs,
2711 const SmallSetVector<Value *, 32> &ExprsInSubprogram,
2712 DenseMap<
Value *, SmallPtrSet<Value *, 2>> &Shared)
const {
2713 if (!ExprsInSubprogram.
count(Root))
2717 if (!ReusedExprs.
insert(Root).second)
2720 OpInfoTy SharedCount;
2724 auto CM = Inst2Matrix.find(Root);
2725 if (
I->second.size() == 1)
2726 Count = CM->second.getOpInfo();
2728 SharedCount = CM->second.getOpInfo();
2731 auto C = sumOpInfos(
Op, ReusedExprs, ExprsInSubprogram, Shared);
2733 SharedCount +=
C.second;
2735 return {
Count, SharedCount};
2738 void emitRemarks() {
2745 MapVector<DISubprogram *, SmallVector<Value *, 8>> Subprog2Exprs;
2746 for (
const auto &KV : Inst2Matrix) {
2747 if (Func.getSubprogram()) {
2749 DILocation *
Context =
I->getDebugLoc();
2756 Subprog2Exprs[
nullptr].push_back(KV.first);
2759 for (
auto &KV : Subprog2Exprs) {
2760 SmallSetVector<Value *, 32> ExprsInSubprogram(KV.second.begin(),
2762 auto Leaves = getExpressionLeaves(ExprsInSubprogram);
2764 DenseMap<Value *, SmallPtrSet<Value *, 2>>
Shared;
2765 for (
Value *Leaf : Leaves)
2766 collectSharedInfo(Leaf, Leaf, ExprsInSubprogram, Shared);
2769 for (
auto *L : Leaves) {
2781 SmallPtrSet<Value *, 8> ReusedExprs;
2782 OpInfoTy Counts, SharedCounts;
2783 std::tie(Counts, SharedCounts) =
2784 sumOpInfos(L, ReusedExprs, ExprsInSubprogram, Shared);
2786 OptimizationRemark Rem(
DEBUG_TYPE,
"matrix-lowered", Loc,
2789 Rem <<
"Lowered with ";
2790 Rem <<
ore::NV(
"NumStores", Counts.NumStores) <<
" stores, "
2791 <<
ore::NV(
"NumLoads", Counts.NumLoads) <<
" loads, "
2792 <<
ore::NV(
"NumComputeOps", Counts.NumComputeOps)
2794 <<
ore::NV(
"NumExposedTransposes", Counts.NumExposedTransposes)
2795 <<
" exposed transposes";
2797 if (SharedCounts.NumStores > 0 || SharedCounts.NumLoads > 0 ||
2798 SharedCounts.NumComputeOps > 0) {
2799 Rem <<
",\nadditionally "
2800 <<
ore::NV(
"NumStores", SharedCounts.NumStores) <<
" stores, "
2801 <<
ore::NV(
"NumLoads", SharedCounts.NumLoads) <<
" loads, "
2802 <<
ore::NV(
"NumFPOps", SharedCounts.NumComputeOps)
2804 <<
" are shared with other expressions";
2807 Rem << (
"\n" + linearize(L, Shared, ExprsInSubprogram, DL));
2815 const DenseMap<
Value *, SmallPtrSet<Value *, 2>> &Shared,
2816 const SmallSetVector<Value *, 32> &ExprsInSubprogram,
2817 const DataLayout &DL) {
2818 ExprLinearizer Lin(DL, Inst2Matrix, Shared, ExprsInSubprogram, L);
2819 Lin.linearizeExpr(L, 0,
false,
false);
2820 return Lin.getResult();
2830 LowerMatrixIntrinsics LMT(
F,
TTI, Minimal ?
nullptr : &AM);
2845 OS, MapClassName2PassName);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
hexagon Hexagon specific predictive commoning for HVX vectors
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static DISubprogram * getSubprogram(DIScope *Scope)
Helper function to either return Scope, if it is a subprogram or the attached subprogram for a local ...
static cl::opt< bool > ForceFusion("force-fuse-matrix", cl::init(false), cl::Hidden, cl::desc("Force matrix instruction fusion even if not profitable."))
static cl::opt< bool > VerifyShapeInfo("verify-matrix-shapes", cl::Hidden, cl::desc("Enable/disable matrix shape verification."), cl::init(false))
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
static cl::opt< bool > TileUseLoops("fuse-matrix-use-loops", cl::init(false), cl::Hidden, cl::desc("Generate loop nest for tiling."))
static cl::opt< bool > FuseMatrix("fuse-matrix", cl::init(true), cl::Hidden, cl::desc("Enable/disable fusing matrix instructions."))
auto m_AnyAdd(const LTy &L, const RTy &R)
Match any add operation (fp or integer).
static cl::opt< bool > AllowContractEnabled("matrix-allow-contract", cl::init(false), cl::Hidden, cl::desc("Allow the use of FMAs if available and profitable. This may " "result in different results, due to less rounding error."))
auto m_AnyMul(const LTy &L, const RTy &R)
Match any mul operation (fp or integer).
static cl::opt< bool > PrintAfterTransposeOpt("matrix-print-after-transpose-opt", cl::init(false))
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
static cl::opt< MatrixLayoutTy > MatrixLayout("matrix-default-layout", cl::init(MatrixLayoutTy::ColumnMajor), cl::desc("Sets the default matrix layout"), cl::values(clEnumValN(MatrixLayoutTy::ColumnMajor, "column-major", "Use column-major layout"), clEnumValN(MatrixLayoutTy::RowMajor, "row-major", "Use row-major layout")))
uint64_t IntrinsicInst * II
PowerPC Reduce CR logical Operation
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static const int BlockSize
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG)
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
InstListType::reverse_iterator reverse_iterator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
LLVM_ABI DISubprogram * getSubprogram() const
Get the subprogram for this scope.
Base class for scope-like contexts.
Subprogram description. Uses SubclassData1.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
static constexpr ElementCount getFixed(ScalarTy MinVal)
void setAllowContract(bool B=true)
bool allowReassoc() const
Flag queries.
bool allowContract() const
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
LLVM_ABI CallInst * CreateFAddReduce(Value *Acc, Value *Src)
Create a sequential vector fadd reduction intrinsic of the source vector.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
CallInst * CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert a memcpy between the specified pointers.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI CallInst * CreateAddReduce(Value *Src)
Create a vector int add reduction intrinsic of the source vector.
IntegerType * getIntPtrTy(const DataLayout &DL, unsigned AddrSpace=0)
Fetch the type of an integer with size at least as big as that of a pointer in the given address spac...
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Align getAlign() const
Return the alignment of the access that is being performed.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
CallInst * CreateMatrixTranspose(Value *Matrix, unsigned Rows, unsigned Columns, const Twine &Name="")
Create a llvm.matrix.transpose call, transposing Matrix with Rows rows and Columns columns.
CallInst * CreateMatrixMultiply(Value *LHS, Value *RHS, unsigned LHSRows, unsigned LHSColumns, unsigned RHSColumns, const Twine &Name="")
Create a llvm.matrix.multiply call, multiplying matrixes LHS and RHS.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
bool isVolatile() const
Return true if this is a store to a volatile memory location.
StringRef - Represent a constant reference to a string, i.e.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
size - Get the string size.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isVoidTy() const
Return true if this is 'void'.
UnaryOps getOpcode() const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
ElementType
The element type of an SRV or UAV resource.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V=0)
Set input string into loop metadata by keeping other values intact.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
LLVM_ABI Error write(MCStreamer &Out, ArrayRef< std::string > Inputs, OnCuIndexOverflow OverflowOptValue)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Mul
Product of integers.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A CRTP mix-in to automatically provide informational APIs needed for passes.